def train(**params):
    print("** Loading training images")
    start = time.time()
    lr_hr_ds, n_data = data.load_train_dataset(params['lr_dir'],
                                               params['hr_dir'], params['ext'],
                                               params['batch_size'])
    val_lr_hr_ds, n_val_data = data.load_test_dataset(params['val_lr_dir'],
                                                      params['val_hr_dir'],
                                                      params['val_ext'],
                                                      params['val_batch_size'])
    print("Finish loading images in %.2fs" % (time.time() - start))

    one_gpu_model, gpu_model = prepare_model(**params)

    exp_folder = make_exp_folder(params['exp_dir'], params['arc'])
    save_params(exp_folder, **params)
    tb_callback = make_tb_callback(exp_folder)
    lr_callback = make_lr_callback(params['lr_init'], params['lr_decay'],
                                   params['lr_decay_at_steps'])
    cp_callback = make_cp_callback(exp_folder, one_gpu_model)

    gpu_model.fit(lr_hr_ds,
                  epochs=params['epochs'],
                  steps_per_epoch=num_iter_per_epoch(n_data,
                                                     params['batch_size']),
                  callbacks=[tb_callback, cp_callback, lr_callback],
                  initial_epoch=params['init_epoch'],
                  validation_data=val_lr_hr_ds,
                  validation_steps=n_val_data)

    one_gpu_model.save_weights(os.path.join(exp_folder, 'final_model.h5'))

    K.clear_session()
예제 #2
0
    def train(self, fname, dataset, sess_info, epochs):
        (sess, saver) = sess_info
        f = open_file(fname)
        iterep = 500
        for i in range(iterep * epochs):
            batch = dataset.train.next_batch(100)
            sess.run(self.train_step, feed_dict={'x:0': batch})

            progbar(i, iterep)
            if (i + 1) % iterep == 0:
                a, b = sess.run(
                    [self.nent, self.loss],
                    feed_dict={
                        'x:0':
                        dataset.train.data[np.random.choice(
                            len(dataset.train.data), 200)]
                    })
                c, d = sess.run([self.nent, self.loss],
                                feed_dict={'x:0': dataset.test.data})
                a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean()
                e = test_acc(dataset, sess, self.qy_logit)
                string = (
                    '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format(
                        'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc',
                        'epoch'))
                stream_print(f, string, i <= iterep)
                string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}'
                          .format(a, b, c, d, e, int((i + 1) / iterep)))
                stream_print(f, string)
            # Saves parameters every 10 epochs
            if (i + 1) % (10 * iterep) == 0:
                print('saving')
                save_params(saver, sess, (i + 1) // iterep)
        if f is not None: f.close()
  def run(self):
    """ Script for initializing training/testing process """

    global_step = tf.Variable(0, trainable=False)
    optimizer = tf.train.AdamOptimizer(self.learning_rate)
    deconv_mult = lambda grads: list(map(lambda x: (x[0] * 1.0, x[1]) if 'deconv' in x[1].name else x, grads))
    grads = deconv_mult(optimizer.compute_gradients(self.loss))
    self.train_op = optimizer.apply_gradients(grads, global_step=global_step)

    tf.global_variables_initializer().run()

    # Load previous model checkpoints if exists
    if self.load():
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if self.params:
      save_params(self.sess, self.model.model_params)
    elif self.train:
      # Train and test run sequentially
      self.run_train()
      self.run_test()
    else:
      self.run_test()
예제 #4
0
def main(num_epochs=20):

    print("Building model and compiling functions...")
    input_var = T.tensor4('inputs')
    fcae = build_fcae(input_var)

    output = nn.layers.get_output(fcae['output'])
    output_det = nn.layers.get_output(fcae['output'], deterministic=True)
    loss = nn.objectives.binary_crossentropy(output, input_var).mean()
    test_loss = nn.objectives.binary_crossentropy(output_det, input_var).mean()

    # ADAM updates
    params = nn.layers.get_all_params(fcae['output'], trainable=True)
    updates = nn.updates.adam(loss, params, learning_rate=1e-3)
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], test_loss)
    ae_fn = theano.function([input_var], nn.layers.get_output(fcae['output']))

    data = u.DataH5PyStreamer(os.path.join(c.external_data, 'mnist.hdf5'),
                              batch_size=128)
    hist = u.train_with_hdf5(
        data,
        num_epochs=num_epochs,
        train_fn=train_fn,
        test_fn=val_fn,
        max_per_epoch=40,
        tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=0.),
        te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=0.))

    u.save_params(fcae['output'],
                  'fcae_params_{}.npz'.format(np.asarray(hist)[-1, -1]))

    from PIL import Image
    from matplotlib import pyplot as plt

    streamer = data.streamer()
    imb = next(streamer.get_epoch_iterator())
    batch = u.raw_to_floatX(imb[0], pixel_shift=0.).transpose((0, 1, 3, 2))

    orig_dim = 28
    im = Image.new("RGB", (orig_dim * 20, orig_dim * 20))
    for j in xrange(10):
        dim = orig_dim
        orig_im = Image.fromarray(
            u.get_picture_array(batch,
                                np.random.randint(batch.shape[0]),
                                shift=0.0))
        im.paste(orig_im.resize((2 * orig_dim, 2 * orig_dim), Image.ANTIALIAS),
                 box=(0, j * orig_dim * 2))
        new_im = {}
        for i in xrange(9):
            new_im = orig_im.resize((dim, dim), Image.ANTIALIAS)
            new_im = ae_fn(
                u.arr_from_img(new_im, shift=0.).reshape(1, -1, dim, dim))
            new_im = Image.fromarray(u.get_picture_array(new_im, 0, shift=0.))\
                    .resize((orig_dim*2, orig_dim*2), Image.ANTIALIAS)
            im.paste(new_im, box=((i + 1) * orig_dim * 2, j * orig_dim * 2))
            dim = int(dim * 1.2)
    im.save('increasing_size_autoencoded.jpg')
예제 #5
0
    def train(self, params, train, dev, test):
        start_time = time.time()
        counter = 0
        try:
            for eidx in xrange(params.epochs):

                kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True)

                uidx = 0
                for _, train_index in kf:

                    uidx += 1

                    batch = [train[t] for t in train_index]
                    vocab = self.get_word_arr(batch)
                    y = self.get_y(batch)
                    x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab))
                    idxs = self.get_idxs(xmask)

                    if params.nntype == "charlstm" or params.nntype == "charcnn":
                        char_indices = self.populate_embeddings_characters(vocab)
                    if params.nntype == "charagram":
                        char_hash = self.populate_embeddings_characters_charagram(vocab)

                    if params.nntype == "charlstm":
                        c, cmask = self.prepare_data(char_indices)
                    if params.nntype == "charcnn":
                        c = self.prepare_data_conv(char_indices)

                    if params.nntype == "charlstm":
                        cost = self.train_function(c, cmask, x, xmask, idxs, y)
                    if params.nntype == "charcnn":
                        cost = self.train_function(c, x, xmask, idxs, y)
                    if params.nntype == "charagram":
                        cost = self.train_function(char_hash, x, xmask, idxs, y)

                    if np.isnan(cost) or np.isinf(cost):
                        print 'NaN detected'

                    #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

                if(params.save):
                    counter += 1
                    utils.save_params(self, params.outfile+str(counter)+'.pickle')

                if(params.evaluate):
                    devscore = self.evaluate(dev, params)
                    testscore = self.evaluate(test, params)
                    trainscore = self.evaluate(train, params)
                    print "accuracy: ", devscore, testscore, trainscore

                print 'Epoch ', (eidx+1), 'Cost ', cost

        except KeyboardInterrupt:
            print "Training interrupted"

        end_time = time.time()
        print "total time:", (end_time - start_time)
예제 #6
0
파일: model.py 프로젝트: talkhouli/sockeye
    def save_params_to_file(self, fname: str):
        """
        Saves model parameters to file.

        :param fname: Path to save parameters to.
        """
        assert self._is_built
        utils.save_params(self.params.copy(), fname)
        logging.info('Saved params to "%s"', fname)
예제 #7
0
def main(num_epochs = 20):

    print("Building model and compiling functions...")
    input_var = T.tensor4('inputs')
    fcae = build_fcae(input_var)

    output = nn.layers.get_output(fcae['output'])
    output_det = nn.layers.get_output(fcae['output'], deterministic=True)
    loss = nn.objectives.binary_crossentropy(output, input_var).mean()
    test_loss = nn.objectives.binary_crossentropy(output_det, input_var).mean()

    # ADAM updates
    params = nn.layers.get_all_params(fcae['output'], trainable=True)
    updates = nn.updates.adam(loss, params, learning_rate=1e-3)
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], test_loss)
    ae_fn = theano.function([input_var], nn.layers.get_output(fcae['output']))

    data = u.DataH5PyStreamer(os.path.join(c.external_data, 'mnist.hdf5'), batch_size=128)
    hist = u.train_with_hdf5(data, num_epochs=num_epochs,
            train_fn = train_fn, test_fn = val_fn,
            max_per_epoch=40,
            tr_transform = lambda x: u.raw_to_floatX(x[0], pixel_shift=0.),
            te_transform = lambda x: u.raw_to_floatX(x[0], pixel_shift=0.))

    u.save_params(fcae['output'], 'fcae_params_{}.npz'.format(np.asarray(hist)[-1,-1]))

    from PIL import Image
    from matplotlib import pyplot as plt

    streamer = data.streamer()
    imb = next(streamer.get_epoch_iterator())
    batch = u.raw_to_floatX(imb[0], pixel_shift=0.).transpose((0,1,3,2))

    orig_dim = 28
    im = Image.new("RGB", (orig_dim*20, orig_dim*20))
    for j in xrange(10):
        dim = orig_dim
        orig_im = Image.fromarray(u.get_picture_array(batch,
            np.random.randint(batch.shape[0]), shift=0.0))
        im.paste(orig_im.resize((2*orig_dim, 2*orig_dim), Image.ANTIALIAS),
                box=(0,j*orig_dim*2))
        new_im = {}
        for i in xrange(9):
            new_im = orig_im.resize((dim, dim), Image.ANTIALIAS)
            new_im = ae_fn(u.arr_from_img(new_im, shift=0.).reshape(1,-1,dim,dim))
            new_im = Image.fromarray(u.get_picture_array(new_im, 0, shift=0.))\
                    .resize((orig_dim*2, orig_dim*2), Image.ANTIALIAS)
            im.paste(new_im, box=((i+1)*orig_dim*2, j*orig_dim*2))
            dim = int(dim * 1.2)
    im.save('increasing_size_autoencoded.jpg')
예제 #8
0
def main(specstr=default_specstr, z_dim=256, num_epochs=10, ch=3, init_from='',
        img_size=64, pxsh=0.5, data_file='', batch_size=8, save_to='params'):

    # build expressions for the output, loss, gradient
    input_var = T.tensor4('inputs')
    print('building specstr {} - zdim {}'.format(specstr, z_dim))
    cae = m.build_cae_nopoolinv(input_var, shape=(img_size,img_size), channels=ch, 
            specstr=specstr.format(z_dim))
    l_list = nn.layers.get_all_layers(cae)
    pred = nn.layers.get_output(cae)
    loss = nn.objectives.squared_error(pred, input_var.flatten(2)).mean()
    params = nn.layers.get_all_params(cae, trainable=True)
    grads = nn.updates.total_norm_constraint(T.grad(loss, params), 10)
    updates = nn.updates.adam(grads, params, learning_rate=1e-3)
    te_pred = nn.layers.get_output(cae, deterministic=True)
    te_loss = nn.objectives.squared_error(te_pred, input_var.flatten(2)).mean()

    # training functions
    print('compiling functions')
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], te_loss)

    # compile functions for encode/decode to test later
    enc_layer = l_list[next(i for i in xrange(len(l_list)) if l_list[i].name=='encode')]
    enc_fn = theano.function([input_var], nn.layers.get_output(enc_layer, deterministic=True))
    dec_fn = lambda z: nn.layers.get_output(cae, deterministic=True,
        inputs={l_list[0]:np.zeros((z.shape[0],ch,img_size,img_size),dtype=theano.config.floatX),
            enc_layer:z}).eval().reshape(-1,ch,img_size,img_size)

    # load params if requested, run training
    if len(init_from) > 0:
        print('loading params from {}'.format(init_from))
        load_params(cae, init_from)
    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    print('training for {} epochs'.format(num_epochs))
    hist = u.train_with_hdf5(data, num_epochs=num_epochs,
        train_fn = train_fn,
        test_fn = val_fn,
        tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=False),
        te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=True))

    # generate examples, save training history
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    tg = u.raw_to_floatX(imb, pixel_shift=pxsh, square=True, center=True)
    pr = dec_fn(enc_fn(tg))
    for i in range(pr.shape[0]):
        u.get_image_pair(tg, pr,index=i,shift=pxsh).save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('cae_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f')
    u.save_params(cae, os.path.join(save_to, 'cae_{}.npz'.format(hist[-1,-1])))
예제 #9
0
    def callback_fn(self, params):
        print("Function value: ", end='')
        loss = objective_function(params, self.X, self.y, self.lambd)
        print(loss)

        print("Average gradient: ", end='')
        avg_grad = np.mean(
            d_optimization_function(params, self.X, self.y, self.lambd)**2)
        print(avg_grad)
        print()

        self.iters += 1
        save_params(params, self.filename, self.iters)
        save_losses(loss, self.loss_filename, self.iters)
예제 #10
0
  def run(self):
    self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)

    tf.global_variables_initializer().run()

    if self.load():
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if self.params:
      save_params(self.sess, self.model.weights, self.model.biases, self.model.alphas, self.model.model_params)
    elif self.train:
      self.run_train()
    else:
      self.run_test()
예제 #11
0
def evaluate(eval_dir, method, train_features, train_labels, test_features,
             test_labels, **kwargs):
    if method == 'svm':
        acc_train, acc_test = svm(train_features, train_labels, test_features,
                                  test_labels)
    elif method == 'knn':
        acc_train, acc_test = knn(train_features, train_labels, test_features,
                                  test_labels, **kwargs)
    elif method == 'nearsub':
        acc_train, acc_test = nearsub(train_features, train_labels,
                                      test_features, test_labels, **kwargs)
    elif method == 'nearsub_pca':
        acc_train, acc_test = knn(train_features, train_labels, test_features,
                                  test_labels, **kwargs)
    acc_dict = {'train': acc_train, 'test': acc_test}
    utils.save_params(eval_dir, acc_dict, name=f'acc_{method}')
예제 #12
0
  def run(self):
    # SGD with momentum
    # self.train_op = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.loss)
    self.train_op = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
    tf.initialize_all_variables().run()

    if self.load(self.checkpoint_dir):
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if self.params:
      save_params(self.sess, self.weights, self.biases)
    elif self.train:
      self.run_train()
    else:
      self.run_test()
예제 #13
0
    def run(self):
        self.train_op = tf.train.AdamOptimizer().minimize(self.loss)

        tf.initialize_all_variables().run()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        if self.params:
            s, d, m = self.model_params
            save_params(self.sess, self.weights, self.biases, self.alphas, s,
                        d, m)
        elif self.train:
            self.run_train()
        else:
            self.run_test()
예제 #14
0
def main(L=2, img_size=64, pxsh=0., z_dim=32, n_hid=1024, num_epochs=12, binary='True',
        init_from='', data_file='', batch_size=128, save_to='params', max_per_epoch=-1):
    binary = binary.lower()=='true'

    # Create VAE model
    input_var = T.tensor4('inputs')
    print("Building model and compiling functions...")
    print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary))
    l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
           m.build_vcae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid)
        
    if len(init_from) > 0:
        print('loading from {}'.format(init_from))
        load_params(l_x, init_from)

    # compile functions
    loss, _ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L)
    test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True,
            binary=binary, L=L)
    params = nn.layers.get_all_params(l_x, trainable=True)
    updates = nn.updates.adam(loss, params, learning_rate=3e-5)
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], test_loss)
    ae_fn = theano.function([input_var], test_prediction)

    # run training loop
    print('training for {} epochs'.format(num_epochs))
    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn,
            max_per_epoch=max_per_epoch,
            tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=False),
            te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=True))

    # generate examples, save training history
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    orig_images = u.raw_to_floatX(imb, pixel_shift=pxsh)
    autoencoded_images = ae_fn(orig_images)
    for i in range(autoencoded_images.shape[0]):
        u.get_image_pair(orig_images, autoencoded_images, index=i, shift=pxsh) \
                .save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('vcae_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f')
    u.save_params(l_x, os.path.join(save_to, 'vcae_{}.npz'.format(hist[-1,-1])))
예제 #15
0
def train(params=None):
    os.makedirs(params['ckpt_path'], exist_ok=True)

    device = torch.device("cuda")

    train_dataset = HDRDataset(params['dataset'],
                               params=params,
                               suffix=params['dataset_suffix'])
    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'],
                              shuffle=True)

    model = HDRPointwiseNN(params=params)
    ckpt = get_latest_ckpt(params['ckpt_path'])
    if ckpt:
        print('Loading previous state:', ckpt)
        state_dict = torch.load(ckpt)
        state_dict, _ = load_params(state_dict)
        model.load_state_dict(state_dict)
    model.to(device)

    mseloss = torch.nn.MSELoss()
    optimizer = Adam(model.parameters(), params['lr'])

    count = 0
    for e in range(params['epochs']):
        model.train()
        for i, (low, full, target) in enumerate(train_loader):
            optimizer.zero_grad()

            low = low.to(device)
            full = full.to(device)
            t = target.to(device)
            res = model(low, full)

            total_loss = mseloss(res, t)
            total_loss.backward()

            if (count + 1) % params['log_interval'] == 0:
                _psnr = psnr(res, t).item()
                loss = total_loss.item()
                print(e, count, loss, _psnr)

            optimizer.step()
            if (count + 1) % params['ckpt_interval'] == 0:
                print('@@ MIN:', torch.min(res), 'MAX:', torch.max(res))
                model.eval().cpu()
                ckpt_model_filename = "ckpt_" + str(e) + '_' + str(
                    count) + ".pth"
                ckpt_model_path = os.path.join(params['ckpt_path'],
                                               ckpt_model_filename)
                state = save_params(model.state_dict(), params)
                torch.save(state, ckpt_model_path)
                test(ckpt_model_path)
                model.to(device).train()
            count += 1
예제 #16
0
    def run(self):
        # SGD with momentum
        # self.train_op = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.loss)
        # Now we use the Adam-Optimizer instead of SGD (Statistical Gradient Decent)
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss)
        tf.initialize_all_variables().run()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        if self.params:
            save_params(self.sess, self.weights, self.biases)
        elif self.train:
            self.run_train()
        else:
            self.run_test()
예제 #17
0
  def run(self):
    global_step = tf.Variable(0, trainable=False)
    optimizer = tf.train.AdamOptimizer(self.learning_rate)
    deconv_mult = lambda grads: list(map(lambda x: (x[0] * 1.0, x[1]) if 'deconv' in x[1].name else x, grads))
    grads = deconv_mult(optimizer.compute_gradients(self.loss))
    self.train_op = optimizer.apply_gradients(grads, global_step=global_step)

    tf.global_variables_initializer().run()

    if self.load():
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if self.params:
      save_params(self.sess, self.model.model_params)
    elif self.train:
      self.run_train()
    else:
      self.run_test()
    def train(self):
        num_minibatches = len(self.mnist.train.x) / self.minibatch_size

        for epoch in xrange(self.num_epochs):
            for mb_index in xrange(num_minibatches):
                mb_x = self.mnist.train.x \
                         [mb_index : mb_index+self.minibatch_size]
                mb_x = mb_x.reshape((self.minibatch_size, 1, 28, 28))

                #E_h = crbm.E_h_given_x_func(mb_x)
                #print "Shape of E_h", E_h.shape

                cd_return = self.crbm.CD_step(mb_x)
                sp_return = self.crbm.sparsity_step(mb_x)

                self.series['cd'].append( \
                        (epoch, mb_index), cd_return)
                self.series['sparsity'].append( \
                        (epoch, mb_index), sp_return)

                total_idx = epoch*num_minibatches + mb_index

                if (total_idx+1) % REDUCE_EVERY == 0:
                    self.series['params'].append( \
                        (epoch, mb_index), self.crbm.params)

                if total_idx % VISUALIZE_EVERY == 0:
                    self.visualize_gibbs_result(\
                        mb_x, GIBBS_STEPS_IN_VIZ_CHAIN,
                        "gibbs_chain_"+str(epoch)+"_"+str(mb_index))
                    self.visualize_gibbs_result(mb_x, 1,
                        "gibbs_1_"+str(epoch)+"_"+str(mb_index))
                    self.visualize_filters(
                        "filters_"+str(epoch)+"_"+str(mb_index))
            if TEST_CONFIG:
                # do a single epoch for cluster tests config
                break

        if SAVE_PARAMS:
            utils.save_params(self.crbm.params, "params.pkl")
예제 #19
0
 def save_params(self, dir_path="", epoch=None):
     param_saving_path = save_params(dir_path=dir_path,
                                     name=self.name,
                                     epoch=epoch,
                                     params=self.params,
                                     aux_states=self.aux_states)
     misc_saving_path = save_misc(
         dir_path=dir_path,
         epoch=epoch,
         name=self.name,
         content={
             'data_shapes':
             {k: list(map(int, v))
              for k, v in self.data_shapes.items()}
         })
     logging.info('Saving %s, params: \"%s\", misc: \"%s\"', self.name,
                  param_saving_path, misc_saving_path)
예제 #20
0
def index(request):
    db = Database('banco-notes')

    if request.startswith('POST'):
        params = save_params(
            request
        )  #params = {'titulo':algm coisa, 'detalhes':outra coisa, 'id':outra coisa}
        #add_to_jsonfile(params, 'notes.json')
        if 'deletar' in params.keys():
            db.delete(params['deletar'])  #id
        elif params['id'] == 'None':
            db.add(
                Note(title=params['titulo'],
                     content=params['detalhes'],
                     id=params['id']))
        else:
            db.update(
                Note(title=params['titulo'],
                     content=params['detalhes'],
                     id=params['id']))

        return build_response(code=303,
                              reason='See Other',
                              headers='Location: /')

    else:
        # Cria uma lista de <li>'s para cada anotação
        print('notas do banco: ', db.get_all())

        note_template = load_template('components/note.html')
        notes_li = [
            note_template.format(id=note.id,
                                 title=note.title,
                                 details=note.content)
            for note in db.get_all()
        ]
        notes = '\n'.join(notes_li)

        print('----------------------------------------')

        return build_response(load_template('index.html').format(notes=notes))
예제 #21
0
def main(data_name, vae_type, dimZ, dimH, n_iter, batch_size, K, checkpoint):
    dimY = 10

    if vae_type == 'A':
        from conv_generator_mnist_A import generator
    if vae_type == 'B':
        from conv_generator_mnist_B import generator
    if vae_type == 'C':
        from conv_generator_mnist_C import generator
    if vae_type == 'D':
        from conv_generator_mnist_D import generator
    if vae_type == 'E':
        from conv_generator_mnist_E import generator
    if vae_type == 'F':
        from conv_generator_mnist_F import generator
    if vae_type == 'G':
        from conv_generator_mnist_G import generator
    from conv_encoder_mnist import encoder_gaussian as encoder
    shape_high = (28, 28)
    input_shape = (28, 28, 1)
    n_channel = 64

    # then define model
    dec = generator(input_shape, dimH, dimZ, dimY, n_channel, 'sigmoid', 'gen')
    enc, enc_conv, enc_mlp = encoder(input_shape, dimH, dimZ, dimY, n_channel,
                                     'enc')

    # define optimisers
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, ) + input_shape)
    Y_ph = tf.placeholder(tf.float32, shape=(batch_size, dimY))
    ll = 'l2'
    fit, eval_acc = construct_optimizer(X_ph, Y_ph, [enc_conv, enc_mlp], dec,
                                        ll, K, vae_type)

    # load data
    from utils_mnist import data_mnist
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                  train_end=60000,
                                                  test_start=0,
                                                  test_end=10000)

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    if not os.path.isdir('save/'):
        os.mkdir('save/')
        print('create path save/')
    path_name = data_name + '_conv_vae_%s/' % (vae_type + '_' + str(dimZ))
    if not os.path.isdir('save/' + path_name):
        os.mkdir('save/' + path_name)
        print('create path save/' + path_name)
    filename = 'save/' + path_name + 'checkpoint'
    if checkpoint < 0:
        print('training from scratch')
        init_variables(sess)
    else:
        load_params(sess, filename, checkpoint)
    checkpoint += 1

    # now start fitting
    n_iter_ = min(n_iter, 20)
    beta = 1.0
    for i in range(int(n_iter / n_iter_)):
        fit(sess, X_train, Y_train, n_iter_, lr, beta)
        # print training and test accuracy
        eval_acc(sess, X_test, Y_test, 'test')

    # save param values
    save_params(sess, filename, checkpoint)
    checkpoint += 1
예제 #22
0
                    type=str,
                    default='',
                    help='extra information to add to folder name')
parser.add_argument(
    '--save_dir',
    type=str,
    default='./saved_models/',
    help='base directory for saving PyTorch model. (default: ./saved_models/)')
args = parser.parse_args()

# pipeline setup
model_dir = os.path.join(
    args.save_dir, "iris", "layers{}_eps{}_eta{}"
    "".format(args.layers, args.eps, args.eta))
os.makedirs(model_dir, exist_ok=True)
utils.save_params(model_dir, vars(args))

# data setup
X_train, y_train, X_test, y_test, num_classes = dataset.load_Iris(0.3)

# model setup
layers = [Vector(args.layers, eta=args.eta, eps=args.eps)]
model = Architecture(layers, model_dir, num_classes)

# train/test pass
print("Forward pass - train features")
Z_train = model(X_train, y_train)
utils.save_loss(model.loss_dict, model_dir, "train")
print("Forward pass - test features")
Z_test = model(X_test)
utils.save_loss(model.loss_dict, model_dir, "test")
예제 #23
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')
    question_belong = []
    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         100,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       100,
                                       relabeling=args.relabeling,
                                       question_belong=question_belong)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling,
                                       question_belong=question_belong)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    #word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size)
    word_dict = pickle.load(open("../obj/dict.pkl", "rb"))
    logging.info('-' * 50)
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)  # EMBEDDING
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params, all_params = build_fn(args, embeddings)
    logging.info('Done.')
    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize(
        dev_examples,
        word_dict,
        sort_by_len=not args.test_only,
        concat=args.concat)
    word_dict_r = {}
    word_dict_r[0] = "unk"
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size,
                           args.concat)
    dev_acc, pred = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc
    if args.test_only:
        return
    utils.save_params(args.model_file, all_params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_x3, train_y = utils.vectorize(train_examples,
                                                            word_dict,
                                                            concat=args.concat)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_x3, train_y,
                             args.batch_size, args.concat)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3,
                  mb_y) in enumerate(all_train):

            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3,
                                  mb_mask3, mb_y)
            if idx % 100 == 0:
                logging.info('#Examples = %d, max_len = %d' %
                             (len(mb_x1), mb_x1.shape[1]))
                logging.info(
                    'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)'
                    % (epoch, idx, len(all_train), train_loss,
                       time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples(
                    [train_x1[k]
                     for k in samples], [train_x2[k] for k in samples],
                    [train_x3[k * 4 + o] for k in samples
                     for o in range(4)], [train_y[k] for k in samples],
                    args.batch_size, args.concat)
                acc, pred = eval_acc(test_fn, sample_train)
                logging.info('Train accuracy: %.2f %%' % acc)
                dev_acc, pred = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file,
                                      all_params,
                                      epoch=epoch,
                                      n_updates=n_updates)
예제 #24
0
                    help='load pretrained checkpoint for assigning labels')
parser.add_argument('--pretrain_epo',
                    type=int,
                    default=None,
                    help='load pretrained epoch for assigning labels')
args = parser.parse_args()

## Pipelines Setup
model_dir = os.path.join(
    args.save_dir,
    'seqsupce_{}+{}_cpb{}_epo{}_bs{}_lr{}_mom{}_wd{}_lcr{}{}'.format(
        args.arch, args.data, args.cpb, args.epo, args.bs, args.lr, args.mom,
        args.wd, args.lcr, args.tail))
headers = ["label_batch_id", "epoch", "step", "loss"]
utils.init_pipeline(model_dir, headers)
utils.save_params(model_dir, vars(args))


## per model functions
def lr_schedule(epoch, optimizer):
    """decrease the learning rate"""
    lr = args.lr
    if epoch >= 400:
        lr = args.lr * 0.01
    elif epoch >= 200:
        lr = args.lr * 0.1
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


## Prepare for Training
예제 #25
0
learning_rate = 0.01
n_train = len(y_train)
n_iters = args.epochs * n_train
print("Training:", args.epochs, "epochs of", n_train, "iterations")
train_loss = numpy.zeros(n_iters)

start_time = timeit.default_timer()
for epoch in range(args.epochs):
    for i in range(n_train):
        iteration = i + n_train * epoch
        train_loss[iteration] = train_model(
            numpy.asarray(X_train[i], dtype='int32'),
            numpy.asarray(y_train[i], dtype='int32'), learning_rate)
        if (len(train_loss) > 1 and train_loss[-1] > train_loss[-2]):
            learning_rate = learning_rate * 0.5
            print("Setting learning rate to {}".format(learning_rate))
        if iteration % args.print_interval == 0:
            print('epoch {}, minibatch {}/{}, train loss {}'.format(
                epoch, i, n_train, train_loss[iteration]))

numpy.save(
    "train_loss_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden,
                                      args.epochs), train_loss)
numpy.save("index_{}".format(args.mode), index_)
print(
    "Saved index to index_{}.npy. Saved train loss to train_loss_{}_{}_h{}_e{}.npy"
    .format(args.mode, args.mode, args.model, args.hidden, args.epochs))
save_params(
    "params_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden,
                                  args.epochs), params)
예제 #26
0
    def train(self,
              fname,
              dataset,
              sess_info,
              epochs,
              save_parameters=True,
              is_labeled=False):
        history = initialize_history()
        (sess, saver) = sess_info
        f = open_file(fname)
        iterep = 500
        for i in range(iterep * epochs):
            batch = dataset.train.next_batch(100)
            sess.run(self.train_step,
                     feed_dict={
                         'x:0': batch,
                         'phase:0': True
                     })
            progbar(i, iterep)
            if (i + 1) % iterep == 0:
                a, b = sess.run(
                    [self.nent, self.loss],
                    feed_dict={
                        'x:0':
                        dataset.train.data[np.random.choice(
                            len(dataset.train.data), 200)],
                        'phase:0':
                        False
                    })
                c, d = sess.run([self.nent, self.loss],
                                feed_dict={
                                    'x:0': dataset.test.data,
                                    'phase:0': False
                                })
                a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean()
                e = (0, test_acc(dataset, sess, self.qy_logit))[is_labeled]
                string = (
                    '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format(
                        'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc',
                        'epoch'))
                stream_print(f, string, i <= iterep)
                string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}'
                          .format(a, b, c, d, e, int((i + 1) / iterep)))
                stream_print(f, string)
                qy = sess.run(self.qy,
                              feed_dict={
                                  'x:0': dataset.test.data,
                                  'phase:0': False
                              })
                print('Sample of qy')
                print(qy[:5])

                history['iters'].append(int((i + 1) / iterep))
                history['ent'].append(a)
                history['val_ent'].append(c)
                history['loss'].append(b)
                history['val_loss'].append(d)
                history['val_acc'].append(e)

            # Saves parameters every 10 epochs
            if (i + 1) % (10 * iterep) == 0 and save_parameters:
                print('saving')
                save_params(saver, sess, (i + 1) // iterep)
        if f is not None: f.close()

        return history
예제 #27
0
    }
    param_set_2 = {
        'sr': 44100,
        'window_size': 50,
        'hop_length': 2205,
        'server_subpaths': 'All'
    }
    param_set_3 = {
        'sr': 44100,
        'window_size': 50,
        'hop_length': 2205,
        'server_subpaths': 'FINAL_East African Popular Music Archive'
    }
    # Set the hop length; at 22050 Hz, 512 samples ~= 23ms  # at 44100Hz, for 50ms use 2205 as hop_length

    save_params(preproc_path, **param_set_1)
    save_params(preproc_path, **param_set_2)
    save_params(preproc_path, **param_set_3)

    # ----- Feature extraction ----- #
    user_confirmation()

    # Define possible parameters for feature extraction
    for method in ['mfcc', 'spectrogram']:
        for duration in [5, 10]:
            save_params(feature_ext_path, method=method, duration=duration)

    # Define a set of preprocessing parameters and a set of feature extraction parameters to use
    preproc_params = 3
    feature_ext_params = 1
    params_list = [preproc_params, feature_ext_params]
예제 #28
0
def main(n_hid=256, lstm_layers=2, num_epochs=100,
        batch_size=32, save_to='output', max_per_epoch=-1):

    # load current set of words used
    words = open(c.words_used_file, 'r').readlines()
    idx_to_words = dict((i+1,w.strip()) for i,w in enumerate(words))
    idx_to_words[0] = '<e>'
    word_dim=len(words)+1

    # normalization expected by vgg-net
    mean_values = np.array([104, 117, 123]).reshape((3,1,1)).astype(theano.config.floatX)

    # build function for extraction convolutional features
    img_var = T.tensor4('images')
    net = m.build_vgg(shape=(c.img_size, c.img_size), input_var=img_var)
    values = pickle.load(open(c.vgg_weights))['param values']
    nn.layers.set_all_param_values(net['pool5'], values)
    conv_feats = theano.function([img_var], nn.layers.get_output(net['pool5']))
    conv_shape = nn.layers.get_output_shape(net['pool5'])

    # helper function for converting word vector to one-hot
    raw_word_var = T.matrix('seq_raw')
    one_hot = theano.function([raw_word_var], nn.utils.one_hot(raw_word_var, m=word_dim))

    # build expressions for lstm
    conv_feats_var = T.tensor4('conv')
    seq_var = T.tensor3('seq')
    lstm = m.build_rnn(conv_feats_var, seq_var, conv_shape, word_dim, n_hid, lstm_layers)
    output = nn.layers.get_output(lstm['output'])
    output_det = nn.layers.get_output(lstm['output'], deterministic=True)
    loss = m.categorical_crossentropy_logdomain(output, seq_var).mean()
    te_loss = m.categorical_crossentropy_logdomain(output_det, seq_var).mean()

    # compile training functions
    params = nn.layers.get_all_params(lstm['output'], trainable=True)
    lr = theano.shared(nn.utils.floatX(1e-3))
    updates = nn.updates.adam(loss, params, learning_rate=lr)
    train_fn = theano.function([conv_feats_var, seq_var], loss, updates=updates)
    test_fn = theano.function([conv_feats_var, seq_var], te_loss)
    predict_fn = theano.function([conv_feats_var, seq_var], T.exp(output_det[:,-1:]))

    zeros = np.zeros((batch_size, 1, word_dim), dtype=theano.config.floatX)
    def transform_data(imb):
        y,x = imb
        # data augmentation: flip = -1 if we do flip over y-axis, 1 if not
        flip = -2*np.random.binomial(1, p=0.5) + 1
        # this vgg-net expects image values that are normalized by mean but not magnitude
        x = (u.raw_to_floatX(x[:,:,::flip], pixel_shift=0.)\
                .transpose(0,1,3,2)[:,::-1] * 255. - mean_values)
        return conv_feats(x), np.concatenate([zeros, one_hot(y)], axis=1)

    data = u.DataH5PyStreamer(c.twimg_hdf5_file, batch_size=batch_size)

    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn,
                      max_per_epoch=max_per_epoch,
                      tr_transform=transform_data,
                      te_transform=transform_data)
    np.savetxt('lstm_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f')
    u.save_params(lstm['output'], os.path.join(save_to,
        'lstm_{}.npz'.format(np.asarray(hist)[-1, -1])))


    # generate some example captions for one batch of images
    streamer = data.streamer(training=False, shuffled=True)
    y_raw, x_raw = next(streamer.get_epoch_iterator())
    x, _ = transform_data((y_raw, x_raw))

    y = zeros
    captions = []
    for idx in xrange(y.shape[0]):
        captions.append([])
    idx_to_words[0] = '<e>'
    for sample_num in xrange(c.max_caption_len):
        pred = predict_fn(x, y)
        new_y = []
        for idx in xrange(pred.shape[0]):
            # reduce size by a small factor to prevent numerical imprecision from
            # making it sum to > 1.
            # reverse it so that <e> gets the additional probability, not a word
            sample = np.random.multinomial(1, pred[idx,0,::-1]*.999999)[::-1]
            captions[idx].append(idx_to_words[np.argmax(sample)])
            new_y.append(sample)
        new_y = np.vstack(new_y).reshape(-1,1,word_dim).astype(theano.config.floatX)
        y = np.concatenate([y, new_y], axis=1)
    captions = ['{},{}\n'.format(i, ' '.join(cap)) for i,cap in enumerate(captions)]
    with open(os.path.join(save_to, 'captions_sample.csv'), 'w') as wr:
        wr.writelines(captions)

    for idx in xrange(x_raw.shape[0]):
        Image.fromarray(x_raw[idx].transpose(2,1,0)).save(os.path.join(save_to,
            'ex_{}.jpg'.format(idx)))
예제 #29
0
def main(data_file = '', num_epochs=10, batch_size = 128, L=2, z_dim=256,
        n_hid=1500, binary='false', img_size = 64, init_from = '', save_to='params',
        split_layer='conv7', pxsh = 0.5, specstr = c.pf_cae_specstr,
        cae_weights=c.pf_cae_params, deconv_weights = c.pf_deconv_params):
    binary = binary.lower() == 'true'

    # pre-trained function for extracting convolutional features from images
    cae = m.build_cae(input_var=None, specstr=specstr, shape=(img_size,img_size))
    laydict = dict((l.name, l) for l in nn.layers.get_all_layers(cae))
    convshape = nn.layers.get_output_shape(laydict[split_layer])
    convs_from_img, _ = m.encoder_decoder(cae_weights, specstr=specstr, layersplit=split_layer,
            shape=(img_size, img_size))
    # pre-trained function for returning to images from convolutional features
    img_from_convs = m.deconvoluter(deconv_weights, specstr=specstr, shape=convshape)

    # Create VAE model
    print("Building model and compiling functions...")
    print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary))
    input_var = T.tensor4('inputs')
    c,w,h = convshape[1], convshape[2], convshape[3]
    l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
            m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid,
                   shape=(w,h), channels=c)

    if len(init_from) > 0:
        print("loading from {}".format(init_from))
        u.load_params(l_x, init_from)
    
    # build loss, updates, training, prediction functions
    loss,_ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L)
    test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True,
            binary=binary, L=L)

    lr = theano.shared(nn.utils.floatX(1e-5))
    params = nn.layers.get_all_params(l_x, trainable=True)
    updates = nn.updates.adam(loss, params, learning_rate=lr)
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], test_loss)
    ae_fn = theano.function([input_var], test_prediction)

    # run training loop
    def data_transform(x, do_center):
        floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center)
        return convs_from_img(floatx_ims)

    print("training for {} epochs".format(num_epochs))
    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn,
                             tr_transform=lambda x: data_transform(x[0], do_center=False),
                             te_transform=lambda x: data_transform(x[0], do_center=True))

    # generate examples, save training history
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    orig_feats = data_transform(imb, do_center=True)
    reconstructed_feats = ae_fn(orig_feats).reshape(orig_feats.shape)
    orig_feats_deconv = img_from_convs(orig_feats)
    reconstructed_feats_deconv = img_from_convs(reconstructed_feats)
    for i in range(reconstructed_feats_deconv.shape[0]):
        u.get_image_pair(orig_feats_deconv, reconstructed_feats_deconv, index=i, shift=pxsh)\
                .save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('vae_convs_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f')
    u.save_params(l_x, os.path.join(save_to, 'vae_convs_{}.npz'.format(hist[-1,-1])))
예제 #30
0
def main(data_name, vae_type, fea_layer, n_iter, batch_size, K, checkpoint,
         data_path):
    # load data
    from import_data_cifar10 import load_data_cifar10
    X_train, X_test, Y_train, Y_test = load_data_cifar10(data_path, conv=True)
    dimY = Y_train.shape[1]

    if vae_type == 'E':
        from mlp_generator_cifar10_E import generator
    if vae_type == 'F':
        from mlp_generator_cifar10_F import generator
    if vae_type == 'G':
        from mlp_generator_cifar10_G import generator
    from mlp_encoder_cifar10 import encoder_gaussian as encoder

    #first build the feature extractor
    input_shape = X_train[0].shape
    sys.path.append('test_attacks/load/')
    from vgg_cifar10 import cifar10vgg
    cnn = cifar10vgg(path='test_attacks/load/vgg_model/', train=False)

    if fea_layer == 'low':
        N_layer = 16
    if fea_layer == 'mid':
        N_layer = 36
    if fea_layer == 'high':
        N_layer = len(cnn.model.layers) - 5
    for layer in cnn.model.layers:
        print(layer.__class__.__name__)

    def feature_extractor(x):
        out = cnn.normalize_production(x * 255.0)
        for i in range(N_layer):
            out = cnn.model.layers[i](out)
        return out
    print(fea_layer, N_layer, cnn.model.layers[N_layer-1].__class__.__name__, \
          cnn.model.layers[N_layer-1].get_config())

    # then define model
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, ) + input_shape)
    Y_ph = tf.placeholder(tf.float32, shape=(batch_size, dimY))
    dimZ = 128  #32
    dimH = 1000
    fea_op = feature_extractor(X_ph)
    if len(fea_op.get_shape().as_list()) == 4:
        fea_op = tf.reshape(fea_op, [batch_size, -1])
    dimF = fea_op.get_shape().as_list()[-1]
    dec = generator(dimF, dimH, dimZ, dimY, 'linear', 'gen')
    n_layers_enc = 2
    enc = encoder(dimF, dimH, dimZ, dimY, n_layers_enc, 'enc')

    ll = 'l2'
    identity = lambda x: x
    fea_ph = tf.placeholder(tf.float32, shape=(batch_size, dimF))
    fit, eval_acc = construct_optimizer(fea_ph, Y_ph, [identity, enc], dec, ll,
                                        K, vae_type)

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    path_name = data_name + '_conv_vae_fea_%s_%s/' % (vae_type, fea_layer)
    if not os.path.isdir('save/' + path_name):
        os.mkdir('save/' + path_name)
        print('create path save/' + path_name)
    filename = 'save/' + path_name + 'checkpoint'
    if checkpoint < 0:
        print('training from scratch')
        init_variables(sess)
    else:
        load_params(sess, filename, checkpoint)
    checkpoint += 1

    # set test phase
    import keras.backend
    keras.backend.set_session(sess)
    cnnfile = 'test_attacks/load/vgg_model/cifar10vgg.h5'
    cnn.model.load_weights(cnnfile)
    print('load weight from', cnnfile)
    keras.backend.set_learning_phase(0)

    # extract features
    def gen_feature(X):
        F = []
        for i in range(int(X.shape[0] / batch_size)):
            batch = X[i * batch_size:(i + 1) * batch_size]
            F.append(sess.run(fea_op, feed_dict={X_ph: batch}))
        return np.concatenate(F, axis=0)

    F_train = gen_feature(X_train)
    F_test = gen_feature(X_test)

    # now start fitting
    beta = 1.0
    n_iter_ = 10
    for i in range(int(n_iter / n_iter_)):
        fit(sess, F_train, Y_train, n_iter_, lr, beta)
        # print training and test accuracy
        eval_acc(sess, F_test, Y_test, 'test', beta)

    # save param values
    save_params(sess, filename, checkpoint, scope='vae')
    checkpoint += 1
예제 #31
0
def main(data_file='',
         img_size=64,
         num_epochs=10,
         batch_size=128,
         pxsh=0.5,
         split_layer='conv7',
         specstr=c.pf_cae_specstr,
         cae_params=c.pf_cae_params,
         save_to='params'):

    # transform function to go from images -> conv feats
    conv_feats, _ = m.encoder_decoder(cae_params,
                                      specstr=specstr,
                                      layersplit=split_layer,
                                      shape=(img_size, img_size))

    # build pretrained net for images -> convfeats in order to get the input shape
    # for the reverse function
    print('compiling functions')
    conv_net = m.build_cae(input_var=None,
                           specstr=specstr,
                           shape=(img_size, img_size))
    cae_layer_dict = dict(
        (l.name, l) for l in nn.layers.get_all_layers(conv_net))
    shape = nn.layers.get_output_shape(cae_layer_dict[split_layer])

    # build net for convfeats -> images
    imgs_var = T.tensor4('images')
    convs_var = T.tensor4('conv_features')
    deconv_net = m.build_deconv_net(input_var=convs_var,
                                    shape=shape,
                                    specstr=specstr)
    loss = nn.objectives.squared_error(
        imgs_var, nn.layers.get_output(deconv_net)).mean()
    te_loss = nn.objectives.squared_error(
        imgs_var, nn.layers.get_output(deconv_net, deterministic=True)).mean()
    params = nn.layers.get_all_params(deconv_net, trainable=True)
    lr = theano.shared(nn.utils.floatX(3e-3))
    updates = nn.updates.adam(loss, params, learning_rate=lr)

    # compile functions
    train_fn = theano.function([convs_var, imgs_var], loss, updates=updates)
    val_fn = theano.function([convs_var, imgs_var], te_loss)
    deconv_fn = theano.function([convs_var],
                                nn.layers.get_output(deconv_net,
                                                     deterministic=True))

    # run training loop
    print("training for {} epochs".format(num_epochs))

    def data_transform(x, do_center):
        floatx_ims = u.raw_to_floatX(x,
                                     pixel_shift=pxsh,
                                     square=True,
                                     center=do_center)
        return (conv_feats(floatx_ims), floatx_ims)

    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    hist = u.train_with_hdf5(
        data,
        num_epochs=num_epochs,
        train_fn=train_fn,
        test_fn=val_fn,
        tr_transform=lambda x: data_transform(x[0], do_center=False),
        te_transform=lambda x: data_transform(x[0], do_center=True))

    # generate examples, save training history and params
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    imb = data_transform(imb, True)[0]
    result = deconv_fn(imb)
    for i in range(result.shape[0]):
        Image.fromarray(u.get_picture_array(result, index=i, shift=pxsh)) \
                .save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('deconv_train_hist.csv',
               np.asarray(hist),
               delimiter=',',
               fmt='%.5f')
    u.save_params(deconv_net,
                  os.path.join(save_to, 'deconv_{}.npz'.format(hist[-1, -1])))
예제 #32
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')

    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         100,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       100,
                                       relabeling=args.relabeling)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = utils.build_dict(train_examples[0] + train_examples[1])
    entity_markers = list(
        set([w for w in word_dict.keys() if w.startswith('@entity')] +
            train_examples[2]))
    entity_markers = ['<unk_entity>'] + entity_markers
    entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
    logging.info('Entity markers: %d' % len(entity_dict))
    args.num_labels = len(entity_dict)

    logging.info('-' * 50)
    # Load embedding file
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params = build_fn(args, embeddings)
    logging.info('Done.')
    if args.prepare_model:
        return train_fn, test_fn, params

    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict,
                                                   entity_dict)
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size)
    dev_acc = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc

    if args.test_only:
        return

    utils.save_params(args.model_file, params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_l, train_y = utils.vectorize(
        train_examples, word_dict, entity_dict)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_l, train_y,
                             args.batch_size)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l,
                  mb_y) in enumerate(all_train):
            logging.info('#Examples = %d, max_len = %d' %
                         (len(mb_x1), mb_x1.shape[1]))
            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y)
            logging.info(
                'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)'
                % (epoch, idx, len(all_train), train_loss,
                   time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples([train_x1[k] for k in samples],
                                            [train_x2[k] for k in samples],
                                            train_l[samples],
                                            [train_y[k] for k in samples],
                                            args.batch_size)
                logging.info('Train accuracy: %.2f %%' %
                             eval_acc(test_fn, sample_train))
                dev_acc = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file,
                                      params,
                                      epoch=epoch,
                                      n_updates=n_updates)
예제 #33
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')
    if not (args.test_only):
        logging.info('*' * 10 + ' All')
        all_examples = utils.load_data(args.all_file,
                                       100,
                                       relabeling=args.relabeling)
        dev_ratio = args.dev_ratio
        sample_index = np.arange(len(all_examples[0]))
        random.seed(1000)
        dev_index = random.sample(sample_index,
                                  int(dev_ratio * len(sample_index)))
        train_index = np.setdiff1d(sample_index, dev_index)
        dev_examples = tuple_part(all_examples, dev_index)
        train_examples = tuple_part(all_examples, train_index)
        #feature preprocessing
        train_fea_flat_np = FeaExtract(train_examples[-1])
        dev_fea_flat_np = FeaExtract(dev_examples[-1])
        train_fea_flat_np2, dev_fea_flat_np2 = Prepocessing_func(
            train_fea_flat_np,
            dev_fea_flat_np,
            varian_ratio_tol=args.pca_ratio)
        train_fea_merge = FeaMerge(train_fea_flat_np2, train_examples[-1])
        dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1])
        train_examples = train_examples[:-1] + (train_fea_merge, )
        dev_examples = dev_examples[:-1] + (dev_fea_merge, )
        args.num_train = len(train_examples[0])
    else:
        #        logging.info('*' * 10 + ' Train')
        #        train_examples = utils.load_data(args.train_file, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling)
        dev_fea_flat_np = FeaExtract(dev_examples[-1])
        dev_fea_flat_np2 = PrepocessingApply_func(dev_fea_flat_np)
        dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1])
        dev_examples = dev_examples[:-1] + (dev_fea_merge, )

    args.num_dev = len(dev_examples[0])
    args.mea_num = dev_examples[4][0].shape[-1]

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = pickle.load(open("../../obj/dict.pkl", "rb"))
    logging.info('-' * 50)
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params, all_params = build_fn(args, embeddings)
    logging.info('Done.')
    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_x3, dev_y, dev_x4 = utils.vectorize(
        dev_examples,
        word_dict,
        sort_by_len=not args.test_only,
        concat=args.concat)
    word_dict_r = {}
    word_dict_r[0] = "unk"
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, dev_x4,
                           args.batch_size, args.concat)
    dev_acc, rediction = eval_acc(test_fn, all_dev)

    logging.info('Dev accuracy: %.2f %%' % dev_acc.mean())
    print(dev_acc.mean())

    best_dev_acc = dev_acc
    best_train_acc = 0
    if args.test_only:
        return dev_acc, best_train_acc
    utils.save_params(args.model_file, all_params, epoch=0, n_updates=0)
    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_x3, train_y, train_x4 = utils.vectorize(
        train_examples, word_dict, concat=args.concat)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_x3, train_y, train_x4,
                             args.batch_size, args.concat)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y,
                  mb_x4, mb_mask4) in enumerate(all_train):

            train_loss = train_fn(mb_x1, mb_mask1, mb_x3, mb_mask3, mb_y,
                                  mb_x4)
            #            if idx % 100 == 0:
            #            if epoch % 100 == 0:
            #                logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1]))
            #                logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                print([x.get_value() for x in params])
                print([x.get_value() for x in all_params])
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples(
                    [train_x1[k]
                     for k in samples], [train_x2[k] for k in samples],
                    [train_x3[k * 4 + o] for k in samples
                     for o in range(4)], [train_y[k] for k in samples],
                    [train_x4[k]
                     for k in samples], args.batch_size, args.concat)
                acc, pred = eval_acc(test_fn, sample_train)
                logging.info('Train accuracy: %.2f %%' % acc)
                train_acc, pred = eval_acc(test_fn, all_train)
                logging.info('train accuracy: %.2f %%' % train_acc)
                dev_acc, pred = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_dev_acc:
                    best_dev_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, best_dev_acc))
                    best_train_acc = acc
                    logging.info(
                        'Best train accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, best_train_acc))
                    utils.save_params(
                        args.model_file,
                        all_params,
                        epoch=epoch,
                        n_updates=n_updates,
                    )

    return best_dev_acc, best_train_acc
예제 #34
0
def main(specstr=default_specstr,
         z_dim=256,
         num_epochs=10,
         ch=3,
         init_from='',
         img_size=64,
         pxsh=0.5,
         data_file='',
         batch_size=8,
         save_to='params'):

    # build expressions for the output, loss, gradient
    input_var = T.tensor4('inputs')
    print('building specstr {} - zdim {}'.format(specstr, z_dim))
    cae = m.build_cae_nopoolinv(input_var,
                                shape=(img_size, img_size),
                                channels=ch,
                                specstr=specstr.format(z_dim))
    l_list = nn.layers.get_all_layers(cae)
    pred = nn.layers.get_output(cae)
    loss = nn.objectives.squared_error(pred, input_var.flatten(2)).mean()
    params = nn.layers.get_all_params(cae, trainable=True)
    grads = nn.updates.total_norm_constraint(T.grad(loss, params), 10)
    updates = nn.updates.adam(grads, params, learning_rate=1e-3)
    te_pred = nn.layers.get_output(cae, deterministic=True)
    te_loss = nn.objectives.squared_error(te_pred, input_var.flatten(2)).mean()

    # training functions
    print('compiling functions')
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], te_loss)

    # compile functions for encode/decode to test later
    enc_layer = l_list[next(i for i in xrange(len(l_list))
                            if l_list[i].name == 'encode')]
    enc_fn = theano.function([input_var],
                             nn.layers.get_output(enc_layer,
                                                  deterministic=True))
    dec_fn = lambda z: nn.layers.get_output(
        cae,
        deterministic=True,
        inputs={
            l_list[0]:
            np.zeros((z.shape[0], ch, img_size, img_size),
                     dtype=theano.config.floatX),
            enc_layer:
            z
        }).eval().reshape(-1, ch, img_size, img_size)

    # load params if requested, run training
    if len(init_from) > 0:
        print('loading params from {}'.format(init_from))
        load_params(cae, init_from)
    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    print('training for {} epochs'.format(num_epochs))
    hist = u.train_with_hdf5(data,
                             num_epochs=num_epochs,
                             train_fn=train_fn,
                             test_fn=val_fn,
                             tr_transform=lambda x: u.raw_to_floatX(
                                 x[0], pixel_shift=pxsh, center=False),
                             te_transform=lambda x: u.raw_to_floatX(
                                 x[0], pixel_shift=pxsh, center=True))

    # generate examples, save training history
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    tg = u.raw_to_floatX(imb, pixel_shift=pxsh, square=True, center=True)
    pr = dec_fn(enc_fn(tg))
    for i in range(pr.shape[0]):
        u.get_image_pair(tg, pr, index=i,
                         shift=pxsh).save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('cae_train_hist.csv',
               np.asarray(hist),
               delimiter=',',
               fmt='%.5f')
    u.save_params(cae, os.path.join(save_to, 'cae_{}.npz'.format(hist[-1,
                                                                      -1])))
예제 #35
0
                               weight_decay=optim_params["weight_decay"])
    elif "SGD" == optim_params["name"]:
        optimizer = optim.SGD(net.get_params_lr(
            lr_not_pretrained=optim_params["lr_not_pretrained"],
            lr_pretrained=optim_params["lr_pretrained"]),
                              momentum=optim_params["momentum"],
                              weight_decay=optim_params["weight_decay"])

    # 学習
    train_net(net,
              train_loader,
              test_loader,
              optimizer=optimizer,
              loss_fn=loss_fn,
              epochs=params["epochs"],
              device=device)
    # 推論
    y, ypred = eval_net(net, test_loader, probability=True, device=device)

    # 正答率とネットワークの重みをリストに追加
    ys.append(y.cpu().numpy())
    ypreds.append(ypred.cpu().numpy())
    recall = recall_score(
        ys[-1], ypreds[-1].argmax(1), average=None, zero_division=0) * 100
    print("テストの各クラスrecall:\n{}\n平均:{}".format(
        np.round(recall, decimals=1), np.round(recall.mean(), decimals=1)))
    net_weights.append(net.cpu().state_dict())

utils.print_result(params, ys, ypreds)
utils.save_params(params, net_weights)
예제 #36
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')

    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = utils.build_dict(train_examples[0] + train_examples[1])
    entity_markers = list(set([w for w in word_dict.keys()
                              if w.startswith('@entity')] + train_examples[2]))
    entity_markers = ['<unk_entity>'] + entity_markers
    entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
    logging.info('Entity markers: %d' % len(entity_dict))
    args.num_labels = len(entity_dict)

    logging.info('-' * 50)
    # Load embedding file
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params = build_fn(args, embeddings)
    logging.info('Done.')

    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict)
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size)
    dev_acc = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc

    if args.test_only:
        return

    utils.save_params(args.model_file, params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_l, train_y = utils.vectorize(train_examples, word_dict, entity_dict)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train):
            logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1]))
            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y)
            logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' %
                         (epoch, idx, len(all_train), train_loss, time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(np.random.choice(args.num_train, min(args.num_train, args.num_dev),
                                                  replace=False))
                sample_train = gen_examples([train_x1[k] for k in samples],
                                            [train_x2[k] for k in samples],
                                            train_l[samples],
                                            [train_y[k] for k in samples],
                                            args.batch_size)
                logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train))
                logging.info('Dev accuracy: %.2f %%' % eval_acc(test_fn, all_dev))
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info('Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                                 % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)
예제 #37
0
def main(data_name, method, dimZ, dimH, n_channel, batch_size, K_mc,
         checkpoint, lbd):
    # set up dataset specific stuff
    from config import config
    labels, n_iter, dimX, shape_high, ll = config(data_name, n_channel)

    if data_name == 'mnist':
        from mnist import load_mnist

    if data_name == 'notmnist':
        from notmnist import load_notmnist

    # import functionalities
    if method == 'onlinevi':
        from bayesian_generator import generator_head, generator_shared, \
            generator, construct_gen
        from onlinevi import construct_optimizer, init_shared_prior, \
            update_shared_prior, update_q_sigma
    if method in ['ewc', 'noreg', 'laplace', 'si']:
        from generator import generator_head, generator_shared, generator, construct_gen
        if method in ['ewc', 'noreg']:
            from vae_ewc import construct_optimizer, lowerbound
        if method == 'ewc': from vae_ewc import update_ewc_loss, compute_fisher
        if method == 'laplace':
            from vae_laplace import construct_optimizer, lowerbound
            from vae_laplace import update_laplace_loss, compute_fisher, init_fisher_accum
        if method == 'si':
            from vae_si import construct_optimizer, lowerbound, update_si_reg

    # then define model
    n_layers_shared = 2
    batch_size_ph = tf.placeholder(tf.int32, shape=(), name='batch_size')
    dec_shared = generator_shared(dimX, dimH, n_layers_shared, 'sigmoid',
                                  'gen')

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    string = method
    if method in ['ewc', 'laplace', 'si']:
        string = string + '_lbd%.1f' % lbd
    if method == 'onlinevi' and K_mc > 1:
        string = string + '_K%d' % K_mc
    path_name = data_name + '_%s/' % string
    if not os.path.isdir('save/'):
        os.mkdir('save/')
    if not os.path.isdir('save/' + path_name):
        os.mkdir('save/' + path_name)
        print 'create path save/' + path_name
    filename = 'save/' + path_name + 'checkpoint'
    if checkpoint < 0:
        print 'training from scratch'
        old_var_list = init_variables(sess)
    else:
        load_params(sess, filename, checkpoint)
    checkpoint += 1

    # visualise the samples
    N_gen = 10**2
    path = 'figs/' + path_name
    if not os.path.isdir('figs/'):
        os.mkdir('figs/')
    if not os.path.isdir(path):
        os.mkdir(path)
        print 'create path ' + path
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, dimX), name='x_ph')

    # now start fitting
    N_task = len(labels)
    gen_ops = []
    X_valid_list = []
    X_test_list = []
    eval_func_list = []
    result_list = []
    if method == 'onlinevi':
        shared_prior_params = init_shared_prior()
    if method in ['ewc', 'noreg']:
        ewc_loss = 0.0
    if method == 'laplace':
        F_accum = init_fisher_accum()
        laplace_loss = 0.0
    if method == 'si':
        old_params_shared = None
        si_reg = None
    n_layers_head = 2
    n_layers_enc = n_layers_shared + n_layers_head - 1
    for task in xrange(1, N_task + 1):
        # first load data
        if data_name == 'mnist':
            X_train, X_test, _, _ = load_mnist(digits=labels[task - 1],
                                               conv=False)
        if data_name == 'notmnist':
            X_train, X_test, _, _ = load_notmnist(data_path,
                                                  digits=labels[task - 1],
                                                  conv=False)
        N_train = int(X_train.shape[0] * 0.9)
        X_valid_list.append(X_train[N_train:])
        X_train = X_train[:N_train]
        X_test_list.append(X_test)

        # define the head net and the generator ops
        dec = generator(
            generator_head(dimZ, dimH, n_layers_head, 'gen_%d' % task),
            dec_shared)
        enc = encoder(dimX, dimH, dimZ, n_layers_enc, 'enc_%d' % task)
        gen_ops.append(construct_gen(dec, dimZ, sampling=False)(N_gen))
        print 'construct eval function...'
        eval_func_list.append(construct_eval_func(X_ph, enc, dec, ll, \
                                                  batch_size_ph, K=100, sample_W=False))

        # then construct loss func and fit func
        print 'construct fit function...'
        if method == 'onlinevi':
            fit = construct_optimizer(X_ph, enc, dec, ll, X_train.shape[0], batch_size_ph, \
                                      shared_prior_params, task, K_mc)
        if method in ['ewc', 'noreg']:
            bound = lowerbound(X_ph, enc, dec, ll)
            fit = construct_optimizer(X_ph, batch_size_ph, bound,
                                      X_train.shape[0], ewc_loss)
            if method == 'ewc':
                fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound,
                                                  X_train.shape[0])

        if method == 'laplace':
            bound = lowerbound(X_ph, enc, dec, ll)
            fit = construct_optimizer(X_ph, batch_size_ph, bound,
                                      X_train.shape[0], laplace_loss)
            fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound,
                                              X_train.shape[0])

        if method == 'si':
            bound = lowerbound(X_ph, enc, dec, ll)
            fit, shared_var_list = construct_optimizer(X_ph, batch_size_ph,
                                                       bound, X_train.shape[0],
                                                       si_reg,
                                                       old_params_shared, lbd)
            if old_params_shared is None:
                old_params_shared = sess.run(shared_var_list)

        # initialise all the uninitialised stuff
        old_var_list = init_variables(sess, old_var_list)

        # start training for each task
        if method == 'si':
            new_params_shared, w_params_shared = fit(sess, X_train, n_iter, lr)
        else:
            fit(sess, X_train, n_iter, lr)

        # plot samples
        x_gen_list = sess.run(gen_ops, feed_dict={batch_size_ph: N_gen})
        for i in xrange(len(x_gen_list)):
            plot_images(x_gen_list[i], shape_high, path, \
                        data_name + '_gen_task%d_%d' % (task, i + 1))

        x_list = [x_gen_list[i][:1] for i in xrange(len(x_gen_list))]
        x_list = np.concatenate(x_list, 0)
        tmp = np.zeros([10, dimX])
        tmp[:task] = x_list
        if task == 1:
            x_gen_all = tmp
        else:
            x_gen_all = np.concatenate([x_gen_all, tmp], 0)

        # print test-ll on all tasks
        tmp_list = []
        for i in xrange(len(eval_func_list)):
            print 'task %d' % (i + 1),
            test_ll = eval_func_list[i](sess, X_valid_list[i])
            tmp_list.append(test_ll)
        result_list.append(tmp_list)

        # save param values
        save_params(sess, filename, checkpoint)
        checkpoint += 1

        # update regularisers/priors
        if method == 'ewc':
            # update EWC loss
            print 'update ewc loss...'
            X_batch = X_train[np.random.permutation(range(
                X_train.shape[0]))[:batch_size]]
            ewc_loss = update_ewc_loss(sess, ewc_loss, var_list, fisher, lbd,
                                       X_batch)
        if method == 'laplace':
            # update EWC loss
            print 'update laplace loss...'
            X_batch = X_train[np.random.permutation(range(
                X_train.shape[0]))[:batch_size]]
            laplace_loss, F_accum = update_laplace_loss(
                sess, F_accum, var_list, fisher, lbd, X_batch)
        if method == 'onlinevi':
            # update prior
            print 'update prior...'
            shared_prior_params = update_shared_prior(sess,
                                                      shared_prior_params)
            # reset the variance of q
            update_q_sigma(sess)

        if method == 'si':
            # update regularisers/priors
            print 'update SI big omega matrices...'
            si_reg, _ = update_si_reg(sess, si_reg, new_params_shared,
                                      old_params_shared, w_params_shared)
            old_params_shared = new_params_shared

    plot_images(x_gen_all, shape_high, path, data_name + '_gen_all')

    for i in xrange(len(result_list)):
        print result_list[i]

    # save results
    fname = 'results/' + data_name + '_%s.pkl' % string
    import pickle
    pickle.dump(result_list, open(fname, 'wb'))
    print 'test-ll results saved in', fname
예제 #38
0
def main(save_to='params', 
         dataset = 'mm',
         kl_loss='true', # use kl-div in z-space instead of mse
         diffs = 'false',
         seq_length = 30,
         num_epochs=1,
         lstm_n_hid=1024,
         max_per_epoch=-1
        ):
    kl_loss = kl_loss.lower() == 'true'
    diffs = diffs.lower() == 'true'

    # set up functions for data pre-processing and model training
    input_var = T.tensor4('inputs')

    # different experimental setup for moving mnist vs pulp fiction dataests
    if dataset == 'pf':
        img_size = 64
        cae_weights = c.pf_cae_params
        cae_specstr = c.pf_cae_specstr
        split_layer = 'conv7'
        inpvar = T.tensor4('input')
        net = m.build_cae(inpvar, specstr=cae_specstr, shape=(img_size, img_size))
        convs_from_img,_ = m.encoder_decoder(cae_weights, specstr=cae_specstr,
                layersplit=split_layer, shape=(img_size, img_size), poolinv=True)
        laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net))
        zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer])
        deconv_weights = c.pf_deconv_params
        vae_weights = c.pf_vae_params
        img_from_convs = m.deconvoluter(deconv_weights, specstr=cae_specstr, shape=zdec_in_shape)
        L=2
        vae_n_hid = 1500
        binary = False
        z_dim = 256
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
               m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid,
                        shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, vae_weights)
        datafile = 'data/pf.hdf5'
        frame_skip=3 # every 3rd frame in sequence
        z_decode_layer = l_x_mu_list[0]
        pixel_shift = 0.5
        samples_per_image = 4
        tr_batch_size = 16 # must be a multiple of samples_per_image
    elif dataset == 'mm':
        img_size = 64
        cvae_weights = c.mm_cvae_params
        L=2
        vae_n_hid = 1024
        binary = True
        z_dim = 32
        zdec_in_shape = (None, 1, img_size, img_size)
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
            m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary,
                       shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, cvae_weights)
        datafile = 'data/moving_mnist.hdf5'
        frame_skip=1
        w,h=img_size,img_size # of raw input image in the hdf5 file
        z_decode_layer = l_x_list[0]
        pixel_shift = 0
        samples_per_image = 1
        tr_batch_size = 128 # must be a multiple of samples_per_image

    # functions for moving to/from image or conv-space, and z-space
    z_mat = T.matrix('z')
    zenc = theano.function([input_var], nn.layers.get_output(l_z_mu, deterministic=True))
    zdec = theano.function([z_mat], nn.layers.get_output(z_decode_layer, {l_z_mu:z_mat},
        deterministic=True).reshape((-1, zdec_in_shape[1]) + zdec_in_shape[2:]))
    zenc_ls = theano.function([input_var], nn.layers.get_output(l_z_ls, deterministic=True))

    # functions for encoding sequences of z's
    print 'compiling functions'
    z_var = T.tensor3('z_in')
    z_ls_var = T.tensor3('z_ls_in')
    tgt_mu_var = T.tensor3('z_tgt')
    tgt_ls_var = T.tensor3('z_ls_tgt')
    learning_rate = theano.shared(nn.utils.floatX(1e-4))

    # separate function definitions if we are using MSE and predicting only z, or KL divergence
    # and predicting both mean and sigma of z
    if kl_loss:
        def kl(p_mu, p_sigma, q_mu, q_sigma):
            return 0.5 * T.sum(T.sqr(p_sigma)/T.sqr(q_sigma) + T.sqr(q_mu - p_mu)/T.sqr(q_sigma)
                               - 1 + 2*T.log(q_sigma) - 2*T.log(p_sigma))
        lstm, _ = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=True)
        z_mu_expr, z_ls_expr = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']])
        z_mu_expr_det, z_ls_expr_det = nn.layers.get_output([lstm['output_mu'],
            lstm['output_ls']], deterministic=True)
        loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr))
        te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det, T.exp(z_ls_expr_det))
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], loss, 
                updates=updates)
        test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], te_loss)
    else:
        lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True)
        loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output']),
                tgt_mu_var).mean()
        te_loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output'],
            deterministic=True), tgt_mu_var).mean()
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates)
        test_fn = theano.function([z_var, tgt_mu_var], te_loss)

    if dataset == 'pf':
        z_from_img = lambda x: zenc(convs_from_img(x))
        z_ls_from_img = lambda x: zenc_ls(convs_from_img(x))
        img_from_z = lambda z: img_from_convs(zdec(z))
    elif dataset == 'mm':
        z_from_img = zenc
        z_ls_from_img = zenc_ls
        img_from_z = zdec

    # training loop
    print('training for {} epochs'.format(num_epochs))
    nbatch = (seq_length+1) * tr_batch_size * frame_skip / samples_per_image
    data = u.DataH5PyStreamer(datafile, batch_size=nbatch)

    # for taking arrays of uint8 (non square) and converting them to batches of sequences
    def transform_data(ims_batch, center=False):
        imb = u.raw_to_floatX(ims_batch, pixel_shift=pixel_shift,
                center=center)[np.random.randint(frame_skip)::frame_skip]
        zbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX)
        zsigbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX)
        for i in xrange(samples_per_image):
            chunk = tr_batch_size/samples_per_image
            if diffs:
                zf = z_from_img(imb).reshape((chunk, seq_length+1, -1))
                zbatch[i*chunk:(i+1)*chunk, 1:] = zf[:,1:] - zf[:,:-1]
                if kl_loss:
                    zls = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1))
                    zsigbatch[i*chunk:(i+1)*chunk, 1:] = zls[:,1:] - zls[:,:-1]
            else:
                zbatch[i*chunk:(i+1)*chunk] = z_from_img(imb).reshape((chunk, seq_length+1, -1))
                if kl_loss:
                    zsigbatch[i*chunk:(i+1)*chunk] = z_ls_from_img(imb).reshape((chunk,
                        seq_length+1, -1))
        if kl_loss:
            return zbatch[:,:-1,:], zsigbatch[:,:-1,:], zbatch[:,1:,:], zsigbatch[:,1:,:]
        return zbatch[:,:-1,:], zbatch[:,1:,:]

    # we need sequences of images, so we do not shuffle data during trainin
    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn,
                     train_shuffle=False,
                     max_per_epoch=max_per_epoch,
                     tr_transform=lambda x: transform_data(x[0], center=False),
                     te_transform=lambda x: transform_data(x[0], center=True))

    hist = np.asarray(hist)
    u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1,-1])))

    # build functions to sample from LSTM
    # separate cell_init and hid_init from the other learned model parameters
    all_param_values = nn.layers.get_all_param_values(lstm['output'])
    init_indices = [i for i,p in enumerate(nn.layers.get_all_params(lstm['output']))
            if 'init' in str(p)]
    init_values = [all_param_values[i] for i in init_indices]
    params_noinit = [p for i,p in enumerate(all_param_values) if i not in init_indices]

    # build model without learnable init values, and load non-init parameters
    if kl_loss:
        lstm_sample, state_vars = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid,
                training=False)
    else:
        lstm_sample, state_vars = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=False)
    nn.layers.set_all_param_values(lstm_sample['output'], params_noinit)

    # extract layers representing thee hidden and cell states, and have sample_fn
    # return their outputs
    state_layers_keys = [k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k]
    state_layers_keys = sorted(state_layers_keys)
    state_layers_keys = sorted(state_layers_keys, key = lambda x:int(x.split('_')[1]))
    state_layers = [lstm_sample[s] for s in state_layers_keys]
    if kl_loss:
        sample_fn = theano.function([z_var, z_ls_var] + state_vars,
                nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] + state_layers,
                    deterministic=True))
    else:
        sample_fn = theano.function([z_var] + state_vars,
                nn.layers.get_output([lstm['output']] + state_layers, deterministic=True))

    from images2gif import writeGif
    from PIL import Image

    # sample approximately 30 different generated video sequences
    te_stream = data.streamer(training=True, shuffled=False)
    interval = data.ntrain / data.batch_size / 30
    for idx,imb in enumerate(te_stream.get_epoch_iterator()):
        if idx % interval != 0:
            continue
        z_tup = transform_data(imb[0], center=True)
        seg_idx = np.random.randint(z_tup[0].shape[0])
        if kl_loss:
            z_in, z_ls_in = z_tup[0], z_tup[1]
            z_last, z_ls_last = z_in[seg_idx:seg_idx+1], z_ls_in[seg_idx:seg_idx+1]
            z_vars = [z_last, z_ls_last]
        else:
            z_in = z_tup[0]
            z_last = z_in[seg_idx:seg_idx+1]
            z_vars = [z_last]
        images = []
        state_values = [np.dot(np.ones((z_last.shape[0],1), dtype=theano.config.floatX), s)
                for s in init_values]
        output_list = sample_fn(*(z_vars + state_values))

        # use whole sequence of predictions for output
        z_pred = output_list[0]
        state_values = output_list[2 if kl_loss else 1:]

        rec = img_from_z(z_pred.reshape(-1, z_dim))
        for k in xrange(rec.shape[0]):
            images.append(Image.fromarray(u.get_picture_array(rec, index=k, shift=pixel_shift)))
        k += 1
        # slice prediction to feed into lstm
        z_pred = z_pred[:,-1:,:]
        if kl_loss:
            z_ls_pred = output_list[1][:,-1:,:]
            z_vars = [z_pred, z_ls_pred]
        else:
            z_vars = [z_pred]
        for i in xrange(30): # predict 30 frames after the end of the priming video
            output_list = sample_fn(*(z_vars + state_values))
            z_pred = output_list[0]
            state_values = output_list[2 if kl_loss else 1:]
            rec = img_from_z(z_pred.reshape(-1, z_dim))
            images.append(Image.fromarray(u.get_picture_array(rec, index=0, shift=pixel_shift)))
            if kl_loss:
                z_ls_pred = output_list[1]
                z_vars = [z_pred, z_ls_pred]
            else:
                z_vars = [z_pred]
        writeGif("sample_{}.gif".format(idx),images,duration=0.1,dither=0)
예제 #39
0
def main(n_hid=256,
         lstm_layers=2,
         num_epochs=100,
         batch_size=32,
         save_to='output',
         max_per_epoch=-1):

    # load current set of words used
    words = open(c.words_used_file, 'r').readlines()
    idx_to_words = dict((i + 1, w.strip()) for i, w in enumerate(words))
    idx_to_words[0] = '<e>'
    word_dim = len(words) + 1

    # normalization expected by vgg-net
    mean_values = np.array([104, 117, 123]).reshape(
        (3, 1, 1)).astype(theano.config.floatX)

    # build function for extraction convolutional features
    img_var = T.tensor4('images')
    net = m.build_vgg(shape=(c.img_size, c.img_size), input_var=img_var)
    values = pickle.load(open(c.vgg_weights))['param values']
    nn.layers.set_all_param_values(net['pool5'], values)
    conv_feats = theano.function([img_var], nn.layers.get_output(net['pool5']))
    conv_shape = nn.layers.get_output_shape(net['pool5'])

    # helper function for converting word vector to one-hot
    raw_word_var = T.matrix('seq_raw')
    one_hot = theano.function([raw_word_var],
                              nn.utils.one_hot(raw_word_var, m=word_dim))

    # build expressions for lstm
    conv_feats_var = T.tensor4('conv')
    seq_var = T.tensor3('seq')
    lstm = m.build_rnn(conv_feats_var, seq_var, conv_shape, word_dim, n_hid,
                       lstm_layers)
    output = nn.layers.get_output(lstm['output'])
    output_det = nn.layers.get_output(lstm['output'], deterministic=True)
    loss = m.categorical_crossentropy_logdomain(output, seq_var).mean()
    te_loss = m.categorical_crossentropy_logdomain(output_det, seq_var).mean()

    # compile training functions
    params = nn.layers.get_all_params(lstm['output'], trainable=True)
    lr = theano.shared(nn.utils.floatX(1e-3))
    updates = nn.updates.adam(loss, params, learning_rate=lr)
    train_fn = theano.function([conv_feats_var, seq_var],
                               loss,
                               updates=updates)
    test_fn = theano.function([conv_feats_var, seq_var], te_loss)
    predict_fn = theano.function([conv_feats_var, seq_var],
                                 T.exp(output_det[:, -1:]))

    zeros = np.zeros((batch_size, 1, word_dim), dtype=theano.config.floatX)

    def transform_data(imb):
        y, x = imb
        # data augmentation: flip = -1 if we do flip over y-axis, 1 if not
        flip = -2 * np.random.binomial(1, p=0.5) + 1
        # this vgg-net expects image values that are normalized by mean but not magnitude
        x = (u.raw_to_floatX(x[:,:,::flip], pixel_shift=0.)\
                .transpose(0,1,3,2)[:,::-1] * 255. - mean_values)
        return conv_feats(x), np.concatenate([zeros, one_hot(y)], axis=1)

    data = u.DataH5PyStreamer(c.twimg_hdf5_file, batch_size=batch_size)

    hist = u.train_with_hdf5(data,
                             num_epochs=num_epochs,
                             train_fn=train_fn,
                             test_fn=test_fn,
                             max_per_epoch=max_per_epoch,
                             tr_transform=transform_data,
                             te_transform=transform_data)
    np.savetxt('lstm_train_hist.csv',
               np.asarray(hist),
               delimiter=',',
               fmt='%.5f')
    u.save_params(
        lstm['output'],
        os.path.join(save_to, 'lstm_{}.npz'.format(np.asarray(hist)[-1, -1])))

    # generate some example captions for one batch of images
    streamer = data.streamer(training=False, shuffled=True)
    y_raw, x_raw = next(streamer.get_epoch_iterator())
    x, _ = transform_data((y_raw, x_raw))

    y = zeros
    captions = []
    for idx in xrange(y.shape[0]):
        captions.append([])
    idx_to_words[0] = '<e>'
    for sample_num in xrange(c.max_caption_len):
        pred = predict_fn(x, y)
        new_y = []
        for idx in xrange(pred.shape[0]):
            # reduce size by a small factor to prevent numerical imprecision from
            # making it sum to > 1.
            # reverse it so that <e> gets the additional probability, not a word
            sample = np.random.multinomial(1,
                                           pred[idx, 0, ::-1] * .999999)[::-1]
            captions[idx].append(idx_to_words[np.argmax(sample)])
            new_y.append(sample)
        new_y = np.vstack(new_y).reshape(-1, 1,
                                         word_dim).astype(theano.config.floatX)
        y = np.concatenate([y, new_y], axis=1)
    captions = [
        '{},{}\n'.format(i, ' '.join(cap)) for i, cap in enumerate(captions)
    ]
    with open(os.path.join(save_to, 'captions_sample.csv'), 'w') as wr:
        wr.writelines(captions)

    for idx in xrange(x_raw.shape[0]):
        Image.fromarray(x_raw[idx].transpose(2, 1, 0)).save(
            os.path.join(save_to, 'ex_{}.jpg'.format(idx)))
예제 #40
0
def main(
    data_file="",
    img_size=64,
    num_epochs=10,
    batch_size=128,
    pxsh=0.5,
    split_layer="conv7",
    specstr=c.pf_cae_specstr,
    cae_params=c.pf_cae_params,
    save_to="params",
):

    # transform function to go from images -> conv feats
    conv_feats, _ = m.encoder_decoder(cae_params, specstr=specstr, layersplit=split_layer, shape=(img_size, img_size))

    # build pretrained net for images -> convfeats in order to get the input shape
    # for the reverse function
    print("compiling functions")
    conv_net = m.build_cae(input_var=None, specstr=specstr, shape=(img_size, img_size))
    cae_layer_dict = dict((l.name, l) for l in nn.layers.get_all_layers(conv_net))
    shape = nn.layers.get_output_shape(cae_layer_dict[split_layer])

    # build net for convfeats -> images
    imgs_var = T.tensor4("images")
    convs_var = T.tensor4("conv_features")
    deconv_net = m.build_deconv_net(input_var=convs_var, shape=shape, specstr=specstr)
    loss = nn.objectives.squared_error(imgs_var, nn.layers.get_output(deconv_net)).mean()
    te_loss = nn.objectives.squared_error(imgs_var, nn.layers.get_output(deconv_net, deterministic=True)).mean()
    params = nn.layers.get_all_params(deconv_net, trainable=True)
    lr = theano.shared(nn.utils.floatX(3e-3))
    updates = nn.updates.adam(loss, params, learning_rate=lr)

    # compile functions
    train_fn = theano.function([convs_var, imgs_var], loss, updates=updates)
    val_fn = theano.function([convs_var, imgs_var], te_loss)
    deconv_fn = theano.function([convs_var], nn.layers.get_output(deconv_net, deterministic=True))

    # run training loop
    print("training for {} epochs".format(num_epochs))

    def data_transform(x, do_center):
        floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center)
        return (conv_feats(floatx_ims), floatx_ims)

    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    hist = u.train_with_hdf5(
        data,
        num_epochs=num_epochs,
        train_fn=train_fn,
        test_fn=val_fn,
        tr_transform=lambda x: data_transform(x[0], do_center=False),
        te_transform=lambda x: data_transform(x[0], do_center=True),
    )

    # generate examples, save training history and params
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    imb = data_transform(imb, True)[0]
    result = deconv_fn(imb)
    for i in range(result.shape[0]):
        Image.fromarray(u.get_picture_array(result, index=i, shift=pxsh)).save("output_{}.jpg".format(i))
    hist = np.asarray(hist)
    np.savetxt("deconv_train_hist.csv", np.asarray(hist), delimiter=",", fmt="%.5f")
    u.save_params(deconv_net, os.path.join(save_to, "deconv_{}.npz".format(hist[-1, -1])))