def evaluate(self, data, ref_alignments, batch_size=4, training=False):
    """Evaluate the model on a data set."""

    ref_align = read_naacl_alignments(ref_alignments)

    ref_iterator = iter(ref_align)
    metric = AERSufficientStatistics()
    accuracy_correct = 0
    accuracy_total = 0
    loss_total = 0
    steps = 0.

    for batch_id, batch in enumerate(iterate_minibatches(data, batch_size=batch_size)):
      x, y = prepare_data(batch, self.x_vocabulary, self.y_vocabulary)
      y_len = np.sum(np.sign(y), axis=1, dtype="int64")

      align, prob, acc_correct, acc_total, loss = self.get_viterbi(x, y, training)
      accuracy_correct += acc_correct
      accuracy_total += acc_total
      loss_total += loss
      steps += 1

      for alignment, N, (sure, probable) in zip(align, y_len, ref_iterator):
        # the evaluation ignores NULL links, so we discard them
        # j is 1-based in the naacl format
        pred = set((aj, j) for j, aj in enumerate(alignment[:N], 1) if aj > 0)
        metric.update(sure=sure, probable=probable, predicted=pred)
        # print(batch[s])
        # print(alignment[:N])
        # print(pred)
        #       s +=1

    accuracy = accuracy_correct / float(accuracy_total)
    return metric.aer(), accuracy, loss_total/float(steps)
示例#2
0
def train_regressor(model,
                    iters=2000,
                    batchsize=100,
                    resample=False,
                    optimizer=None,
                    log_likelihood=gaussian_log_likelihood):
    X = (model.X - model.mx) * model.iSx
    Y = (model.Y - model.my) * model.iSy
    N = X.shape[0]
    M = batchsize

    if optimizer is None:
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(params, 1e-3)

    pbar = tqdm.tqdm(enumerate(iterate_minibatches(X, Y, M)), total=iters)

    for i, batch in pbar:
        x, y = batch
        model.zero_grad()
        outs = model(x, normalize=False, resample=resample)
        Enlml = -log_likelihood(y, *outs).mean()
        loss = Enlml + model.regularization_loss() / N
        loss.backward()
        optimizer.step()
        pbar.set_description('log-likelihood of data: %f' % (-Enlml))
        if i == iters:
            pbar.close()
            break
示例#3
0
def predict_label(words, masks, chars, predict_fn, alphabet_label):
    predict_list = []
    for batch in utils.iterate_minibatches(words, masks=masks, char_inputs=chars):
        word_inputs, mask_inputs, char_inputs = batch
        predicts = predict_fn(word_inputs, mask_inputs, char_inputs)
        predict_list += utils.output_predictions(predicts, mask_inputs, alphabet_label)
    return predict_list
 def train(self, paths):
     assert self.sess is not None
     obs = numpy.concatenate([path['observations'] for path in paths])
     returns = numpy.concatenate([path['returns'] for path in paths])
     if self.batch_size is not None and obs.shape[0] >= self.batch_size:
         for x, z in iterate_minibatches([obs, returns], self.batch_size, shuffle=True):
             self.sess.run(self.train_op, feed_dict={self.x: x, self.z: z})
     else:
         self.sess.run(self.train_op, feed_dict={self.x: obs, self.z: returns})
示例#5
0
    def training(self, source, num_epochs=50, logger=None):
        """ training procedure. Used to train a multiple output network.
        """

        if logger is None:
            logger = new_logger()

        logger.info("Starting training...")
        final_stats = {
                'source training loss': [], 'source training acc': [],
                'source valid loss': [], 'source valid acc': [],
                }

        for epoch in range(num_epochs):
            start_time = time.time()
            stats = { key:[] for key in final_stats.keys()}
            # training (forward and backward propagation)
            source_batches = iterate_minibatches(source['X_train'], source['y_train'], source['batchsize'], shuffle=True)
            for source_batch in source_batches:
                X, y = source_batch
                loss, acc = self.train_label(X, y)
                stats['source training loss'].append(loss)
                stats['source training acc'].append(acc*100)
                
            # Validation (forward propagation)
            source_batches = iterate_minibatches(source['X_val'], source['y_val'], source['batchsize'])
            for source_batch in source_batches:
                X, y = source_batch
                loss, acc = self.valid_label(X, y)
                stats['source valid loss'].append(loss)
                stats['source valid acc'].append(acc*100)

            logger.info("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))
            for stat_name, stat_value in sorted(stats.items()):
                if stat_value:
                    mean_value = np.mean(stat_value)
                    logger.info('   {:30} : {:.6f}'.format(
                        stat_name, mean_value))
                    final_stats[stat_name].append(mean_value)

        return final_stats
示例#6
0
def test(test_data, test_labels, batch_size, model, test_batch_num):
    accuracy=0.0
    keep_probs_values = [1.0 for i in range(len(model.keep_probs_values))]
    for batch in utils.iterate_minibatches(inputs=test_data, targets=test_labels, batchsize=batch_size):
        test_in, test_target = batch
        #test_in = test_in[:,np.newaxis,:,np.newaxis]
        #print model.sess.run(tf.reduce_sum(tf.equal(tf.argmax(model.output_layer,1), tf.argmax(model.y, 1))) ,
        #                            feed_dict={model.x:test_in, model.y:test_target})
        accuracy += model.sess.run(tf.reduce_mean(tf.cast(tf.equal(tf.argmax(model.output_layer,1), tf.argmax(model.y, 1)), tf.float32)),
                                feed_dict={model.x:test_in, model.y:test_target, model.keep_probs:keep_probs_values})
    print'accuracy: {}'.format(accuracy/test_batch_num)
    return accuracy/test_batch_num
示例#7
0
文件: model.py 项目: vyraun/deepcode
def _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc):
    # a full pass over the validation data:
    val_err = 0.0
    val_acc = 0.0
    val_batches = 0
    for batch in utils.iterate_minibatches(X_val, next_problem_val, truth_val, batchsize, shuffle=False):
        X_, next_problem_, truth_ = batch
        err, acc = compute_cost_acc(X_, next_problem_, truth_)
        val_err += err
        val_acc += acc
        val_batches += 1
    val_loss = val_err/val_batches
    val_acc = val_acc/val_batches * 100
    return val_loss, val_acc
示例#8
0
def infer(data_filepath='data/flowers.hdf5',
          z_dim=128,
          out_dir='gan',
          n_steps=10):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, 2))
    emb_a, emb_b = val_data[1]
    txts = val_data[2]

    # add batch dimension
    emb_a, emb_b = emb_a[None, :], emb_b[None, :]

    # sample z vector for inference
    z = np.random.uniform(-1, 1, size=(1, z_dim))

    G.trainable = False
    # predict using embeddings a and b
    fake_image_a = G.predict([z, emb_a])[0]
    fake_image_b = G.predict([z, emb_b])[0]

    # add and subtract
    emb_add = (emb_a + emb_b)
    emb_a_sub_b = (emb_a - emb_b)
    emb_b_sub_a = (emb_b - emb_a)

    # generate images
    fake_a = G.predict([z, emb_a])[0]
    fake_b = G.predict([z, emb_b])[0]
    fake_add = G.predict([z, emb_add])[0]
    fake_a_sub_b = G.predict([z, emb_a_sub_b])[0]
    fake_b_sub_a = G.predict([z, emb_b_sub_a])[0]

    fake_a = ((fake_a + 1) * 0.5)
    fake_b = ((fake_b + 1) * 0.5)
    fake_add = ((fake_add + 1) * 0.5)
    fake_a_sub_b = ((fake_a_sub_b + 1) * 0.5)
    fake_b_sub_a = ((fake_b_sub_a + 1) * 0.5)

    plt.imsave("{}/fake_text_arithmetic_a".format(out_dir), fake_a)
    plt.imsave("{}/fake_text_arithmetic_b".format(out_dir), fake_b)
    plt.imsave("{}/fake_text_arithmetic_add".format(out_dir), fake_add)
    plt.imsave("{}/fake_text_arithmetic_a_sub_b".format(out_dir), fake_a_sub_b)
    plt.imsave("{}/fake_text_arithmetic_b_sub_a".format(out_dir), fake_b_sub_a)
    print(str(txts[0]),
          str(txts[1]),
          file=open("{}/fake_text_arithmetic.txt".format(out_dir), "a"))
示例#9
0
def calc_validation_loss(sess, loss, accuracy, input_seq, ouput_seq, X_val, y_val):
    '''
    Calculate validation loss on the entire validation set
    '''
    val_accuracy, val_loss, val_batches = 0., 0., 0
    batch_size = min(config.val_batch_size, X_val.shape[0])
    for (inputs, targets) in utils.iterate_minibatches(X_val, y_val, batchsize=batch_size):

        batch_loss, batch_accuracy = sess.run([loss, accuracy], feed_dict={input_seq : inputs, ouput_seq : targets})
        val_batches += 1
        val_loss += batch_loss
        val_accuracy += batch_accuracy

    val_loss /= val_batches
    val_accuracy /= val_batches
    return val_loss, val_accuracy
示例#10
0
文件: model.py 项目: vyraun/deepcode
def check_accuracy(data, compute_cost_acc, dataset_name='test', batchsize=32):
    X_test, next_problem_test, truth_test = data
    print("Testing...")
    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in utils.iterate_minibatches(X_test, next_problem_test, truth_test, batchsize, shuffle=False):
        X_, next_problem_, truth_ = batch
        err, acc = compute_cost_acc(X_, next_problem_, truth_)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  {} loss:\t\t\t{:.6f}".format(dataset_name, test_err / test_batches))
    print("  {} accuracy:\t\t{:.2f} %".format(dataset_name, test_acc / test_batches * 100))
示例#11
0
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan',
          n_samples=5):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, n_samples))    
    emb, txts = val_data[1], val_data[2]

    # sample z vector for inference
    z = np.random.uniform(-1, 1, size=(n_samples, z_dim))

    G.trainable = False
    fake_images = G.predict([z, emb])
    for i in range(n_samples):
        img = ((fake_images[i] + 1)*0.5)
        plt.imsave("{}/fake_{}".format(out_dir, i), img)
        print(i, str(txts[i]).strip(),
              file=open("{}/fake_text.txt".format(out_dir), "a"))
示例#12
0
文件: core.py 项目: gntoni/deepNet
 def _run_epoch(self, X, y, batchsize, training=False):
     """
     Function that takes a pair of input data and labels, splits i them
     into minibatches and pass them through the network.
     If training (training = True), parameters of the network will be
     updated.
     
     Args:
         X (ndarray): Input data
         y (ndarray): Labels
         batchsize (TYPE): Size of the desired minibatches
         training (bool, optional): If true, updates of the network
                 parameters with Stochastic Gradient descend will be
                 performed after each iteration.
     
     Returns:
         (float, float): Average Error and Average Accuracy
                 When training only error is returned (Accuracy = None)
     """
     err = 0
     acc = 0
     batches = 0
     for batch in tqdm(iterate_minibatches(
                                           X,
                                           y,
                                           batchsize,
                                           shuffle=training),
                       total=len(X)/batchsize):
         inputs, targets = batch
         inputs = np.asarray(inputs)
         targets = np.asarray(targets)
         
         if training:
             err += self._train_fn(inputs, targets)
         else:
             verr, vacc = self._val_fn(inputs, targets)
             err += verr
             acc += vacc
         batches += 1
     if training:
         return (err/batches, None)
     else:
         return (err/batches, (acc/batches)*100)
示例#13
0
 def compute_feature(X, Y, batchsize=batchsize, shuffle=False):
     out = np.zeros((len(Y), 4096))
     batch_id = 0
     for batch in iterate_minibatches(X, Y, batchsize, shuffle=False):
         inputs, _ = batch
         # Flip random half of the batch
         flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False)
         if len(flip_idx)>1:
             inputs[flip_idx] = inputs[flip_idx,:,:,::-1]
         # Substract mean image
         inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) 
         # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead
         if len(inputs)==batchsize:
             out[batch_id*batchsize : (batch_id+1)*batchsize] = feat_fn(inputs)
             batch_id += 1
         else:
             out[batch_id*batchsize : ] = feat_fn(inputs)
             
     return out
示例#14
0
文件: model.py 项目: vyraun/deepcode
def train(train_data, val_data, train_acc_fn, compute_cost_acc, num_epochs=5, batchsize=32):
    
    X_train, next_problem_train, truth_train = train_data
    X_val, next_problem_val, truth_val = val_data
    print("Starting training...")
    # We iterate over epochs:
    train_accuracies = []
    val_accuracies = []
    train_losses = []
    val_losses = []
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0.0
        train_acc = 0.0
        train_batches = 0
        start_time = time.time()
        for batch in utils.iterate_minibatches(X_train, next_problem_train, truth_train, batchsize, shuffle=False):
            X_, next_problem_, truth_ = batch
            err, acc = train_acc_fn(X_, next_problem_, truth_)
            train_err += err
            train_acc += acc
            train_batches += 1
            val_loss, val_acc = _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc)
            print("  Epoch {} \tbatch {} \tloss {} \ttrain acc {:.2f} \tval acc {:.2f} ".format(epoch, train_batches, err, acc * 100, val_acc) )
        train_acc = train_acc/train_batches * 100
        train_accuracies.append(train_acc)
        train_loss = train_err/train_batches
        train_losses.append(train_loss)

        val_loss, val_acc = _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_loss))
        print("  training accuracy:\t\t{:.2f} %".format(train_acc))
        print("  validation loss:\t\t{:.6f}".format(val_loss))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc))

    print("Training completed.")
    return train_losses, train_accuracies, val_accuracies
示例#15
0
    def optimize_policy(self, sess, samples, logger=None, **args):

        obs = samples['observations']
        actions = samples['actions']
        advantages = samples['advantages']
        dist_vars = [samples['infos'][k] for k in self.dist.keys()]

        inputs = [obs, actions, advantages] + dist_vars
        feed_dict = dict(list(zip(self.inputs_tensors, inputs)))
        if self.batch_size is not None and obs.shape[0] >= self.batch_size:
            for vs in iterate_minibatches(inputs,
                                          self.batch_size,
                                          shuffle=True):
                sess.run(self.train_op,
                         feed_dict=dict(list(zip(self.inputs_tensors, vs))))
        else:
            sess.run(self.train_op, feed_dict=feed_dict)

        if logger:
            summary_str = sess.run(self.summary_op, feed_dict=feed_dict)
            logger.add_summary(summary_str)
def infer(data_filepath='data/flowers.hdf5',
          z_dim=128,
          out_dir='gan',
          n_steps=10):

    G = load_model(out_dir)
    val_data = get_data(data_filepath, 'train')
    val_data = next(iterate_minibatches(val_data, 2))
    emb_source, emb_target = val_data[1]
    txts = val_data[2]

    z = np.random.uniform(-1, 1, size=(1, z_dim))

    G.trainable = False
    for i in range(n_steps + 1):
        p = i / float(n_steps)
        emb = emb_source * (1 - p) + emb_target * p
        emb = emb[None, :]
        fake_image = G.predict([z, emb])[0]
        img = ((fake_image + 1) * 0.5)
        plt.imsave("{}/fake_text_interpolation_i{}".format(out_dir, i), img)
        print(i,
              str(txts[int(round(p))]).strip(),
              file=open("{}/fake_text_interpolation.txt".format(out_dir), "a"))
示例#17
0
def train(noise_dim, gen_lr, disc_lr, batch_size, num_epochs, save_every,
          tensorboard_vis):
    """Trains the Deep Convolutional Generative Adversarial Network (DCGAN).

    See https://arxiv.org/abs/1511.06434 for more details.

    Args: optional arguments [python train.py --help]
    """
    # Load Dataset.
    logging.info('loading LFW dataset into memory')
    X, IMAGE_SHAPE = load_dataset(dimx=36, dimy=36)

    tf.reset_default_graph()
    try:
        if not tf.test.is_gpu_available(cuda_only=True):
            raise Exception
    except Exception:
        logging.critical('CUDA capable GPU device not found.')
        exit(0)

    logging.warn('constructing graph on GPU')
    with tf.device('/gpu:0'):

        # Define placeholders for input data.
        noise = tf.placeholder('float32', [None, noise_dim])
        real_data = tf.placeholder('float32', [
            None,
        ] + list(IMAGE_SHAPE))

        # Create Generator and Discriminator models.
        logging.debug('creating generator and discriminator')
        g_out = generator(noise, train=True)
        d_probs, d_fake_logits = discriminator(g_out, train=True)
        d_probs2, d_real_logits = discriminator(real_data, train=True)

        logging.debug('defining training ops')
        # Define Generator(G) ops.
        g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_fake_logits, labels=tf.ones_like(d_fake_logits)))
        g_optimizer = tf.train.AdamOptimizer(learning_rate=gen_lr)
        g_vars = get_vars_by_scope('generator')
        g_train_step = g_optimizer.minimize(g_loss, var_list=g_vars)

        # Define Discriminator(D) ops.
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_real_logits, labels=tf.ones_like(d_real_logits)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_fake_logits, labels=tf.zeros_like(d_real_logits)))
        d_loss = d_loss_real + d_loss_fake
        d_optimizer = tf.train.AdamOptimizer(learning_rate=disc_lr)
        d_vars = get_vars_by_scope('discriminator')
        d_train_step = d_optimizer.minimize(d_loss, var_list=d_vars)

    with tf.Session() as sess:
        # Init vars.
        sess.run(tf.global_variables_initializer())

        # Start training.
        logging.debug('training DCGAN model')
        for epoch in range(num_epochs):
            eval_noise = sample_noise_batch(16)
            idx = np.random.choice(range(X.shape[0]), size=16)
            eval_real_data = X[idx]
            for X_batch in tqdm(iterate_minibatches(X,
                                                    batch_size,
                                                    shuffle=True),
                                total=X.shape[0] // batch_size,
                                desc='Epoch[{}/{}]'.format(
                                    epoch + 1, num_epochs),
                                leave=False):
                sess.run([d_train_step],
                         feed_dict={
                             real_data: X_batch,
                             noise: sample_noise_batch(batch_size)
                         })
                for _ in range(2):
                    sess.run([g_train_step],
                             feed_dict={noise: sample_noise_batch(batch_size)})
            # Evaluating model after every epoch.
            d_loss_iter, g_loss_iter, eval_images = sess.run(
                [d_loss, g_loss, g_out],
                feed_dict={
                    real_data: eval_real_data,
                    noise: eval_noise
                })
            # Generate images using G and save in `out/`.
            tl.visualize.save_images(eval_images, [4, 4],
                                     'out/eval_{}.png'.format(epoch + 1))
            logging.info(
                'Epoch[{}/{}]    g_loss: {:.6f}   -   d_loss: {:.6f}'.format(
                    epoch + 1, num_epochs, g_loss_iter, d_loss_iter))
示例#18
0
def comparison(X_train,
               y_train,
               X_val,
               y_val,
               X_test,
               y_test,
               kron_params=None):
    import pickle
    kron_params = [{
        'rank': p
    } for p in np.arange(2, 5, 1)] if kron_params is None else kron_params
    num_epochs = 5

    batch_size = 100

    hidden_units = [4 * 4]

    trains, accs = generate_train_acc(widths=hidden_units, type="dense")
    trains, accs = list(
        zip(*([(trains, accs)] + [
            generate_train_acc(
                widths=hidden_units, type="kron", params=kron_param)
            for kron_param in kron_params
        ] + [
            generate_train_acc(
                widths=hidden_units, type="uv_kron", params=kron_param)
            for kron_param in kron_params
        ])))

    names = ["dense"] + [
        "kron({})".format(p.values()) for p in kron_params
    ] + ["uv_kron({})".format(p.values()) for p in kron_params]
    results = {}

    for train, acc, name in zip(trains, accs, names):
        res = {}
        res["train_fun"] = train
        res["accuracy_fun"] = acc
        res["train_err"] = []
        res["train_acc"] = []
        res["epoch_times"] = []
        res["val_acc"] = []
        results[name] = res

    for epoch in range(num_epochs):
        for (res_name, res) in results.items():
            train_err = 0
            train_acc = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train, y_train, batch_size):
                inputs, targets = batch
                train_err_batch, train_acc_batch = res["train_fun"](inputs,
                                                                    targets)
                train_err += train_err_batch
                train_acc += train_acc_batch
                train_batches += 1

            # And a full pass over the validation data:
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, batch_size):
                inputs, targets = batch
                val_acc += res["accuracy_fun"](inputs, targets)
                val_batches += 1

            # Then we print the results for this epoch:
            print("for {}".format(res_name))
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))

            print("  training loss (in-iteration):\t\t{:.6f}".format(
                train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(train_acc /
                                                         train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            res["train_err"].append(train_err / train_batches)
            res["train_acc"].append(train_acc / train_batches * 100)
            res["val_acc"].append(val_acc / val_batches * 100)
    for res in results.values():
        res.pop('train_fun')
        res.pop('accuracy_fun')
    with open("comparative_history.dict", 'wb') as pickle_file:
        pickle.dump(results, pickle_file)
示例#19
0
文件: network.py 项目: Lanuet/NNVLP
def train_model(num_data, batch_size, learning_rate, patience, decay_rate,
                X_train, Y_train, mask_train, C_train, X_dev, Y_dev, mask_dev,
                C_dev, X_test, Y_test, mask_test, C_test, input_var,
                target_var, mask_var, char_input_var, model, model_name,
                label_alphabet, output_dir):
    num_tokens = mask_var.sum(dtype=theano.config.floatX)
    energies_train = lasagne.layers.get_output(model)
    energies_eval = lasagne.layers.get_output(model, deterministic=True)
    loss_train = utils.crf_loss(energies_train, target_var, mask_var).mean()
    loss_eval = utils.crf_loss(energies_eval, target_var, mask_var).mean()
    _, corr_train = utils.crf_accuracy(energies_train, target_var)
    corr_train = (corr_train * mask_var).sum(dtype=theano.config.floatX)
    prediction_eval, corr_eval = utils.crf_accuracy(energies_eval, target_var)
    corr_eval = (corr_eval * mask_var).sum(dtype=theano.config.floatX)
    params = lasagne.layers.get_all_params(model, trainable=True)
    updates = lasagne.updates.momentum(loss_train,
                                       params=params,
                                       learning_rate=learning_rate,
                                       momentum=0.9)
    train_fn = theano.function(
        [input_var, target_var, mask_var, char_input_var],
        [loss_train, corr_train, num_tokens],
        updates=updates)
    eval_fn = theano.function(
        [input_var, target_var, mask_var, char_input_var],
        [loss_eval, corr_eval, num_tokens, prediction_eval])
    num_batches = num_data / batch_size
    num_epochs = 20
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    for epoch in range(1, num_epochs + 1):
        print('Epoch %d (learning rate=%.4f, decay rate=%.4f): ' %
              (epoch, lr, decay_rate))
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        train_inst = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train,
                                               Y_train,
                                               masks=mask_train,
                                               char_inputs=C_train,
                                               batch_size=batch_size,
                                               shuffle=True):
            inputs, targets, masks, char_inputs = batch
            err, corr, num = train_fn(inputs, targets, masks, char_inputs)
            train_err += err * inputs.shape[0]
            train_corr += corr
            train_total += num
            train_inst += inputs.shape[0]
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data, train_err
                / train_inst, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        assert train_inst == num_data
        sys.stdout.write("\b" * num_back)
        print('train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' %
              (min(train_batches * batch_size,
                   num_data), num_data, train_err / num_data,
               train_corr * 100 / train_total, time.time() - start_time))
        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        dev_inst = 0
        for batch in utils.iterate_minibatches(X_dev,
                                               Y_dev,
                                               masks=mask_dev,
                                               char_inputs=C_dev,
                                               batch_size=batch_size):
            inputs, targets, masks, char_inputs = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks,
                                                  char_inputs)
            dev_err += err * inputs.shape[0]
            dev_corr += corr
            dev_total += num
            dev_inst += inputs.shape[0]
            utils.output_predictions(predictions,
                                     targets,
                                     masks,
                                     output_dir + '/dev%d' % epoch,
                                     label_alphabet,
                                     is_flattened=False)
        print('dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' %
              (dev_err / dev_inst, dev_corr, dev_total,
               dev_corr * 100 / dev_total))
        if model_name != 'pos':
            input = open(output_dir + '/dev%d' % epoch)
            p1 = subprocess.Popen(shlex.split("perl conlleval.pl"),
                                  stdin=input)
            p1.wait()
        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch
            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            test_inst = 0
            for batch in utils.iterate_minibatches(X_test,
                                                   Y_test,
                                                   masks=mask_test,
                                                   char_inputs=C_test,
                                                   batch_size=batch_size):
                inputs, targets, masks, char_inputs = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks,
                                                      char_inputs)
                test_err += err * inputs.shape[0]
                test_corr += corr
                test_total += num
                test_inst += inputs.shape[0]
                utils.output_predictions(predictions,
                                         targets,
                                         masks,
                                         output_dir + '/test%d' % epoch,
                                         label_alphabet,
                                         is_flattened=False)
            np.savez('pre-trained-model/' + model_name + '/weights',
                     *lasagne.layers.get_all_param_values(model))
            print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' %
                  (test_err / test_inst, test_corr, test_total,
                   test_corr * 100 / test_total))
            if model_name != 'pos':
                input = open(output_dir + '/test%d' % epoch)
                p1 = subprocess.Popen(shlex.split("perl conlleval.pl"),
                                      stdin=input)
                p1.wait()
            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr
        # stop if dev acc decrease patience time straightly.
        if stop_count == patience:
            break
        # re-compile a function with new learning rate for training
        lr = learning_rate / (1.0 + epoch * decay_rate)
        lasagne.updates.momentum(loss_train,
                                 params=params,
                                 learning_rate=lr,
                                 momentum=0.9)
        train_fn = theano.function(
            [input_var, target_var, mask_var, char_input_var],
            [loss_train, corr_train, num_tokens],
            updates=updates)
    # print(best performance on test data.)
    print("final best loss test performance (at epoch %d)" % (best_epoch_loss))
    print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' %
          (best_loss_test_err / test_inst, best_loss_test_corr, test_total,
           best_loss_test_corr * 100 / test_total))
    print("final best acc test performance (at epoch %d)" % (best_epoch_acc))
    print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' %
          (best_acc_test_err / test_inst, best_acc_test_corr, test_total,
           best_acc_test_corr * 100 / test_total))
示例#20
0
def train(images,
          labels,
          fold,
          model_type,
          batch_size,
          num_epochs,
          subj_id=0,
          reuse_cnn=False,
          dropout_rate=dropout_rate,
          learning_rate_default=1e-3,
          Optimizer=tf.train.AdamOptimizer,
          log_path=log_path):
    """
    A sample training function which loops over the training set and evaluates the network
    on the validation set after each epoch. Evaluates the network on the training set
    whenever the
    :param images: input images
    :param labels: target labels
    :param fold: tuple of (train, test) index numbers
    :param model_type: model type ('cnn', '1dconv', 'lstm', 'mix')
    :param batch_size: batch size for training
    :param num_epochs: number of epochs of dataset to go over for training
    :param subj_id: the id of fold for storing log and the best model
    :param reuse_cnn: whether to train cnn first, and load its weight for multi-frame model
    :return: none
    """

    with tf.name_scope('Inputs'):
        input_var = tf.placeholder(tf.float32, [None, None, 32, 32, n_colors],
                                   name='X_inputs')
        target_var = tf.placeholder(tf.int64, [None], name='y_inputs')
        tf_is_training = tf.placeholder(tf.bool, None, name='is_training')

    num_classes = len(np.unique(labels))
    (X_train,
     y_train), (X_val, y_val), (X_test,
                                y_test) = reformatInput(images, labels, fold)

    print('Train set label and proportion:\t',
          np.unique(y_train, return_counts=True))
    print('Val   set label and proportion:\t',
          np.unique(y_val, return_counts=True))
    print('Test  set label and proportion:\t',
          np.unique(y_test, return_counts=True))

    print('The shape of X_trian:\t', X_train.shape)
    print('The shape of X_val:\t', X_val.shape)
    print('The shape of X_test:\t', X_test.shape)

    print("Building model and compiling functions...")
    if model_type == '1dconv':
        network = build_convpool_conv1d(input_var,
                                        num_classes,
                                        train=tf_is_training,
                                        dropout_rate=dropout_rate,
                                        name='CNN_Conv1d' + '_sbj' +
                                        str(subj_id))
    elif model_type == 'lstm':
        network = build_convpool_lstm(input_var,
                                      num_classes,
                                      100,
                                      train=tf_is_training,
                                      dropout_rate=dropout_rate,
                                      name='CNN_LSTM' + '_sbj' + str(subj_id))
    elif model_type == 'mix':
        network = build_convpool_mix(input_var,
                                     num_classes,
                                     100,
                                     train=tf_is_training,
                                     dropout_rate=dropout_rate,
                                     name='CNN_Mix' + '_sbj' + str(subj_id))
    elif model_type == 'cnn':
        with tf.name_scope(name='CNN_layer' + '_fold' + str(subj_id)):
            network = build_cnn(input_var)  # output shape [None, 4, 4, 128]
            convpool_flat = tf.reshape(network, [-1, 4 * 4 * 128])
            h_fc1_drop1 = tf.layers.dropout(convpool_flat,
                                            rate=dropout_rate,
                                            training=tf_is_training,
                                            name='dropout_1')
            h_fc1 = tf.layers.dense(h_fc1_drop1,
                                    256,
                                    activation=tf.nn.relu,
                                    name='fc_relu_256')
            h_fc1_drop2 = tf.layers.dropout(h_fc1,
                                            rate=dropout_rate,
                                            training=tf_is_training,
                                            name='dropout_2')
            network = tf.layers.dense(h_fc1_drop2,
                                      num_classes,
                                      name='fc_softmax')
            # the loss function contains the softmax activation
    else:
        raise ValueError(
            "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']")

    Train_vars = tf.trainable_variables()

    prediction = network

    with tf.name_scope('Loss'):
        l2_loss = tf.add_n(
            [tf.nn.l2_loss(v) for v in Train_vars if 'kernel' in v.name])
        ce_loss = tf.losses.sparse_softmax_cross_entropy(labels=target_var,
                                                         logits=prediction)
        _loss = ce_loss + weight_decay * l2_loss

    # decay_steps learning rate decay
    decay_steps = 3 * (
        len(y_train) // batch_size
    )  # len(X_train)//batch_size  the training steps for an epcoh
    with tf.name_scope('Optimizer'):
        # learning_rate = learning_rate_default * Decay_rate^(global_steps/decay_steps)
        global_steps = tf.Variable(0, name="global_step", trainable=False)
        learning_rate = tf.train.exponential_decay(  # learning rate decay
            learning_rate_default,  # Base learning rate.
            global_steps,
            decay_steps,
            0.95,  # Decay rate.
            staircase=True)
        optimizer = Optimizer(
            learning_rate)  # GradientDescentOptimizer  AdamOptimizer
        train_op = optimizer.minimize(_loss,
                                      global_step=global_steps,
                                      var_list=Train_vars)

    with tf.name_scope('Accuracy'):
        prediction = tf.argmax(prediction, axis=1)
        correct_prediction = tf.equal(prediction, target_var)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Output directory for models and summaries
    # choose different path for different model and subject
    out_dir = os.path.abspath(
        os.path.join(os.path.curdir, log_path,
                     (model_type + '_' + str(subj_id))))
    print("Writing to {}\n".format(out_dir))

    # Summaries for loss, accuracy and learning_rate
    loss_summary = tf.summary.scalar('loss', _loss)
    acc_summary = tf.summary.scalar('train_acc', accuracy)
    lr_summary = tf.summary.scalar('learning_rate', learning_rate)

    # Train Summaries
    train_summary_op = tf.summary.merge(
        [loss_summary, acc_summary, lr_summary])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                 tf.get_default_graph())

    # Dev summaries
    dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
    dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
    dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                               tf.get_default_graph())

    # Test summaries
    test_summary_op = tf.summary.merge([loss_summary, acc_summary])
    test_summary_dir = os.path.join(out_dir, "summaries", "test")
    test_summary_writer = tf.summary.FileWriter(test_summary_dir,
                                                tf.get_default_graph())

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, model_type)
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    if model_type != 'cnn' and reuse_cnn:
        # saver for reuse the CNN weight
        reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='VGG_NET_CNN')
        original_saver = tf.train.Saver(
            reuse_vars)  # Pass the variables as a list

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    print("Starting training...")
    total_start_time = time.time()
    best_validation_accu = 0

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)
        if model_type != 'cnn' and reuse_cnn:
            cnn_model_path = os.path.abspath(
                os.path.join(os.path.curdir, log_path, ('cnn_' + str(subj_id)),
                             'checkpoints'))
            cnn_model_path = tf.train.latest_checkpoint(cnn_model_path)
            print('-' * 20)
            print('Load cnn model weight for multi-frame model from {}'.format(
                cnn_model_path))
            original_saver.restore(sess, cnn_model_path)

        stop_count = 0  # count for earlystopping
        for epoch in range(num_epochs):
            print('-' * 50)
            # Train set
            train_err = train_acc = train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train,
                                             y_train,
                                             batch_size,
                                             shuffle=False):
                inputs, targets = batch
                summary, _, pred, loss, acc = sess.run(
                    [train_summary_op, train_op, prediction, _loss, accuracy],
                    {
                        input_var: inputs,
                        target_var: targets,
                        tf_is_training: True
                    })
                train_acc += acc
                train_err += loss
                train_batches += 1
                train_summary_writer.add_summary(summary,
                                                 sess.run(global_steps))

            av_train_err = train_err / train_batches
            av_train_acc = train_acc / train_batches

            # Val set
            summary, pred, av_val_err, av_val_acc = sess.run(
                [dev_summary_op, prediction, _loss, accuracy], {
                    input_var: X_val,
                    target_var: y_val,
                    tf_is_training: False
                })
            dev_summary_writer.add_summary(summary, sess.run(global_steps))

            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))

            fmt_str = "Train \tEpoch [{:d}/{:d}]  train_Loss: {:.4f}\ttrain_Acc: {:.2f}"
            print_str = fmt_str.format(epoch + 1, num_epochs, av_train_err,
                                       av_train_acc * 100)
            print(print_str)

            fmt_str = "Val \tEpoch [{:d}/{:d}]  val_Loss: {:.4f}\tval_Acc: {:.2f}"
            print_str = fmt_str.format(epoch + 1, num_epochs, av_val_err,
                                       av_val_acc * 100)
            print(print_str)

            # Test set
            summary, pred, av_test_err, av_test_acc = sess.run(
                [test_summary_op, prediction, _loss, accuracy], {
                    input_var: X_test,
                    target_var: y_test,
                    tf_is_training: False
                })
            test_summary_writer.add_summary(summary, sess.run(global_steps))

            fmt_str = "Test \tEpoch [{:d}/{:d}]  test_Loss: {:.4f}\ttest_Acc: {:.2f}"
            print_str = fmt_str.format(epoch + 1, num_epochs, av_test_err,
                                       av_test_acc * 100)
            print(print_str)

            if av_val_acc > best_validation_accu:  # early_stoping
                stop_count = 0
                eraly_stoping_epoch = epoch
                best_validation_accu = av_val_acc
                test_acc_val = av_test_acc
                saver.save(sess,
                           checkpoint_prefix,
                           global_step=sess.run(global_steps))
            else:
                stop_count += 1
                if stop_count >= 10:  # stop training if val_acc dose not imporve for over 10 epochs
                    break

        train_batches = train_acc = 0
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch
            acc = sess.run(accuracy, {
                input_var: X_train,
                target_var: y_train,
                tf_is_training: False
            })
            train_acc += acc
            train_batches += 1

        last_train_acc = train_acc / train_batches

        last_val_acc = av_val_acc
        last_test_acc = av_test_acc
        print('-' * 50)
        print('Time in total:', time.time() - total_start_time)
        print("Best validation accuracy:\t\t{:.2f} %".format(
            best_validation_accu * 100))
        print(
            "Test accuracy when got the best validation accuracy:\t\t{:.2f} %".
            format(test_acc_val * 100))
        print('-' * 50)
        print("Last train accuracy:\t\t{:.2f} %".format(last_train_acc * 100))
        print("Last validation accuracy:\t\t{:.2f} %".format(last_val_acc *
                                                             100))
        print("Last test accuracy:\t\t\t\t{:.2f} %".format(last_test_acc *
                                                           100))
        print('Early Stopping at epoch: {}'.format(eraly_stoping_epoch + 1))

    train_summary_writer.close()
    dev_summary_writer.close()
    test_summary_writer.close()
    return [
        last_train_acc, best_validation_accu, test_acc_val, last_val_acc,
        last_test_acc
    ]
def comparison(X_train,y_train,X_val,y_val,X_test,y_test, kron_params=None):
    import pickle
    kron_params = [{'rank': p} for p in np.arange(2, 5, 1)] if kron_params is None else kron_params
    num_epochs = 5

    batch_size = 100

    hidden_units = [4*4]

    trains, accs = generate_train_acc(widths=hidden_units, type="dense")
    trains, accs = list(zip(*([(trains, accs)]
                              + [generate_train_acc(widths=hidden_units, type="kron", params=kron_param) for kron_param in kron_params]
                              + [generate_train_acc(widths=hidden_units, type="uv_kron", params=kron_param) for kron_param in kron_params])))

    names = ["dense"] + ["kron({})".format(p.values()) for p in kron_params] + ["uv_kron({})".format(p.values()) for p in kron_params]
    results = {}

    for train, acc, name in zip(trains, accs, names):
        res = {}
        res["train_fun"] = train
        res["accuracy_fun"] = acc
        res["train_err"] = []
        res["train_acc"] = []
        res["epoch_times"] = []
        res["val_acc"] = []
        results[name] = res

    for epoch in range(num_epochs):
        for (res_name, res) in results.items():
            train_err = 0
            train_acc = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train, y_train,batch_size):
                inputs, targets = batch
                train_err_batch, train_acc_batch= res["train_fun"](inputs, targets)
                train_err += train_err_batch
                train_acc += train_acc_batch
                train_batches += 1

            # And a full pass over the validation data:
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, batch_size):
                inputs, targets = batch
                val_acc += res["accuracy_fun"](inputs, targets)
                val_batches += 1

            # Then we print the results for this epoch:
            print("for {}".format(res_name))
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))

            print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(
                train_acc / train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            res["train_err"].append(train_err / train_batches)
            res["train_acc"].append(train_acc / train_batches * 100)
            res["val_acc"].append(val_acc / val_batches * 100)
    for res in results.values():
        res.pop('train_fun')
        res.pop('accuracy_fun')
    with open("comparative_history.dict", 'wb') as pickle_file:
        pickle.dump(results, pickle_file)
def run(X_train,y_train,X_val,y_val,X_test,y_test):
    import pickle
    import cProfile
    kron_params = [{'param_density': p} for p in np.linspace(0.0, 0.0, 1, endpoint=False)]
    num_epochs = 5

    batch_size = 100

    hidden_units = [100**2]

    trains, accs = list(zip(*([generate_train_acc(widths=hidden_units, type="old_kron", params=kron_param) for kron_param in kron_params])))

    names = ["old_kron({})".format(p.values()) for p in kron_params]
    results = {}

    for train, acc, name in zip(trains, accs, names):
        res = {}
        res["train_fun"] = train
        res["accuracy_fun"] = acc
        res["train_err"] = []
        res["train_acc"] = []
        res["epoch_times"] = []
        res["val_acc"] = []
        results[name] = res

    # Just profile if you need
    pr = cProfile.Profile()
    pr.enable()
    for epoch in range(num_epochs):
        for (res_name, res) in results.items():
            train_err = 0
            train_acc = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train, y_train,batch_size):
                inputs, targets = batch
                train_err_batch, train_acc_batch= res["train_fun"](inputs, targets)
                train_err += train_err_batch
                train_acc += train_acc_batch
                train_batches += 1

            # And a full pass over the validation data:
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, batch_size):
                inputs, targets = batch
                val_acc += res["accuracy_fun"](inputs, targets)
                val_batches += 1

            # Then we print the results for this epoch:
            print("for {}".format(res_name))
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))

            print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(
                train_acc / train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            res["train_err"].append(train_err / train_batches)
            res["train_acc"].append(train_acc / train_batches * 100)
            res["val_acc"].append(val_acc / val_batches * 100)
    # Just profile if you need
    pr.disable()
    pr.print_stats(sort='cumtime')
    for res in results.values():
        res.pop('train_fun')
        res.pop('accuracy_fun')
    with open("comparative_history.dict", 'wb') as pickle_file:
        pickle.dump(results, pickle_file)
示例#23
0
def train(num_epochs, batch_size, learning_rate, tensorboard_vis):

    X_train, X_val, X_test, Y_train, Y_val, Y_test = load_dataset()
    # X_train, Y_train = np.random.random(size=(1000, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(1000, 1)).astype(np.float32)
    # X_test, Y_test = np.random.random(size=(200, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(200, 1)).astype(np.float32)
    # X_val, Y_val = np.random.random(size=(100, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(100, 1)).astype(np.float32)

    print("number of training examples = " + str(X_train.shape[0]))
    print("number of test examples = " + str(X_test.shape[0]))
    print("X_train shape: " + str(X_train.shape))
    print("Y_train shape: " + str(Y_train.shape))
    print("X_test shape: " + str(X_test.shape))
    print("Y_test shape: " + str(Y_test.shape))

    num_examples = X_train.shape[0]
    input_shape = (None, ) + tuple(X_train.shape[1:])
    timestamp = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime())

    tf.reset_default_graph()

    image_data = tf.placeholder(dtype=tf.float32,
                                shape=input_shape,
                                name='image_data')
    targets = tf.placeholder(dtype=tf.float32, shape=(None, 1), name='targets')
    keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')

    with tf.variable_scope('zero_pad') as scope:
        zero_pad = tf.pad(image_data, [[0, 0], [3, 3], [3, 3], [0, 0]],
                          name=scope.name)

    with tf.variable_scope('conv1') as scope:
        kernel = tf.get_variable('kernel',
                                 shape=[7, 7, 3, 32],
                                 initializer=tf.random_uniform_initializer(),
                                 dtype=tf.float32)
        conv = tf.nn.conv2d(zero_pad,
                            filter=kernel,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        bn = tf.layers.batch_normalization(conv)
        relu = tf.nn.relu(bn)
        dropout = tf.nn.dropout(relu, keep_prob=keep_prob)
        conv1 = tf.nn.max_pool(dropout,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name=scope.name)

    with tf.variable_scope('logits') as scope:
        dim = np.prod(conv1.get_shape().as_list()[1:])
        flatten = tf.reshape(conv1, shape=[-1, dim])
        weights = tf.get_variable('weights',
                                  shape=[dim, 1],
                                  initializer=tf.random_uniform_initializer(),
                                  dtype=tf.float32)
        bias = tf.get_variable('bias',
                               shape=[1],
                               initializer=tf.constant_initializer(0.0),
                               dtype=tf.float32)
        dense = tf.add(tf.matmul(flatten, weights), bias)
        logits = tf.nn.sigmoid(dense, name=scope.name)

    loss = tf.losses.sigmoid_cross_entropy(targets, logits=logits)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(logits, targets), dtype=tf.float32))

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_step = optimizer.minimize(loss)

    if tensorboard_vis:
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('accuracy', accuracy)
        summaries = tf.summary.merge_all()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver(tf.trainable_variables())

    n_steps = num_examples // batch_size
    if not num_examples % batch_size == 0:
        n_steps += 1

    if tensorboard_vis:
        train_writer = tf.summary.FileWriter('logs/train', sess.graph)
        val_writer = tf.summary.FileWriter('logs/val', sess.graph)

    for epoch in range(num_epochs):
        # Training
        train_losses, train_accuracies, n_iter = [], [], 0
        for image_batch, label_batch in tqdm(
                iterate_minibatches(X_train,
                                    Y_train,
                                    batchsize=batch_size,
                                    shuffle=True),
                total=n_steps,
                desc='Epoch {}/{}'.format(epoch, num_epochs)):
            if tensorboard_vis:
                _, train_loss, train_acc, summary = sess.run(
                    [train_step, loss, accuracy, summaries],
                    feed_dict={
                        image_data: image_batch,
                        targets: label_batch,
                        keep_prob: 0.5
                    })
            else:
                _, train_loss, train_acc = sess.run(
                    [train_step, loss, accuracy],
                    feed_dict={
                        image_data: image_batch,
                        targets: label_batch,
                        keep_prob: 0.5
                    })
            if tensorboard_vis and n_iter == 0:
                train_writer.add_summary(summary, n_iter)
                train_writer.flush()

            train_losses.append(train_loss)
            train_accuracies.append(train_acc)
            n_iter += 1

        avg_train_loss = np.mean(train_losses)
        avg_train_acc = np.mean(train_accuracies)

        # Validation
        val_losses, val_accuracies, n_iter = [], [], 0
        for image_batch, label_batch in iterate_minibatches(
                X_val, Y_val, batchsize=batch_size, shuffle=True):
            if tensorboard_vis:
                val_loss, val_acc, summary = sess.run(
                    [loss, accuracy, summaries],
                    feed_dict={
                        image_data: image_batch,
                        targets: label_batch,
                        keep_prob: 1.0
                    })
            else:
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 image_data: image_batch,
                                                 targets: label_batch,
                                                 keep_prob: 1.0
                                             })
            if tensorboard_vis and n_iter == 0:
                val_writer.add_summary(summaries, n_iter)
                val_writer.flush()
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
            n_iter += 1

        avg_val_loss = np.mean(val_losses)
        avg_val_acc = np.mean(val_accuracies)
        print(
            'Epoch {}/{}: train loss: {:.4f} train acc: {:.4f} val loss: {:.4f} val acc: {:.4f}'
            .format(epoch, num_epochs, avg_train_loss, avg_train_acc,
                    avg_val_loss, avg_val_acc))
        # save model checkpoint
        saver.save(sess,
                   'models/{}/model.ckpt'.format(timestamp),
                   global_step=epoch)

    # Testing
    test_losses, test_accuracies, n_iter = [], [], 0
    for image_batch, label_batch in iterate_minibatches(X_test,
                                                        Y_test,
                                                        batchsize=batch_size,
                                                        shuffle=True):
        test_loss, test_acc = sess.run([loss, accuracy],
                                       feed_dict={
                                           image_data: image_batch,
                                           targets: label_batch,
                                           keep_prob: 1.0
                                       })
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)
        n_iter += 1

    avg_test_loss = np.mean(test_losses)
    avg_test_acc = np.mean(test_accuracies)
    print('Test Loss: {:.4f} Test Accuracy: {:.4f}'.format(
        avg_test_loss, avg_test_acc))

    sess.close()
示例#24
0
# With our vocabulary, we still need a method that converts a whole sentence to a sequence of IDs.
# And, to speed up training, we would like to get a so-called mini-batch at a time: multiple of such sequences together. So our function takes a corpus iterator and a vocabulary, and returns a mini-batch of shape [Batch, Time], where the first dimension indexes the sentences in the batch, and the second the time steps in each sentence.

# In[19]:

from utils import iterate_minibatches, prepare_data

# Let's try it out!

# In[20]:

src_reader = smart_reader(train_e_path)
trg_reader = smart_reader(train_f_path)
bitext = bitext_reader(src_reader, trg_reader)

for batch_id, batch in enumerate(iterate_minibatches(bitext, batch_size=4)):

    print("This is the batch of data that we will train on, as tokens:")
    pprint(batch)
    print()

    x, y = prepare_data(batch, vocabulary_e, vocabulary_f)

    print("These are our inputs (i.e. words replaced by IDs):")
    print(x)
    print()

    print("These are the outputs (the foreign sentences):")
    print(y)
    print()
        target_var, wordEmbeddings)
    """
    epsilon = 1.0e-7
    print ("Starting training...")
    best_val_acc = 0
    best_val_pearson = 0
    for epoch in range(args.epochs):
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(
            X1_train,
            X1_mask_train,
            X2_train,
            X2_mask_train,
            Y_labels_train,
            Y_scores_train,
            Y_scores_pred_train,
            args.minibatch,
            shuffle=True,
        ):

            inputs1, inputs1_mask, inputs2, inputs2_mask, labels, scores, scores_pred = batch

            if args.task == "sts":
                scores_pred = np.clip(scores_pred, epsilon, 1.0 - epsilon)
                train_err += train_fn(inputs1, inputs1_mask, inputs2, inputs2_mask, scores_pred)
                # train_err += train_fn(inputs1, inputs2, scores_pred)
            elif args.task == "ent":
                # labels = np.clip(labels, epsilon, 1.0 - epsilon)
                train_err += train_fn(inputs1, inputs1_mask, inputs2, inputs2_mask, labels)
def train(
    X_train,
    y_train,
    X_test,
    y_test,
    architecture,
    LABEL_1,
    LABEL_2,  # labels of the y.
    num_epochs=100,
    batchsize=5,
    dict_of_paths={
        'output': '1.txt',
        'picture': '1.png',
        'report': 'report.txt'
    },
    report='''trained next architecture, used some
                    optimizstion method with learning rate...'''):
    """
    Iterate minibatches on train subset and validate results on test subset.

    Parameters
    ----------
    X_train : numpy array
        X train subset.
    y_train : numpy array
        Y train subset.
    X_test : numpy array
        X test subset.
    y_test : numpy array
        Y test subset.
    LABEL_1 : {'AD', 'LMCI', 'EMCI', 'Normal'}
        String label for target == 0.
    LABEL_2 : {'AD', 'LMCI', 'EMCI', 'Normal'}
        String label for target == 1.
    dict_of_paths : dictionary
        Names of files to store results.
    report : string
        Some comments which will saved into report after ending of training.
    num_epochs : integer
        Number of epochs for all of the experiments. Default is 100.
    batchsize : integer
        Batchsize for network training. Default is 5.

    Returns
    -------
    tr_losses : numpy.array
        Array with loss values on train.
    val_losses : numpy.array
        Array with loss values on test.
    val_accs : numpy.array
        Array with accuracy values on test.
    rocs : numpy.array
        Array with roc auc values on test.

    """

    eps = []
    tr_losses = []
    val_losses = []
    val_accs = []
    rocs = []

    FILE_PATH = dict_of_paths['output']
    PICTURE_PATH = dict_of_paths['picture']
    REPORT_PATH = dict_of_paths['report']

    # here we written outputs on each step (val and train losses, accuracy, auc)
    with open(FILE_PATH, 'w') as f:
        f.write('\n----------\n\n' + str(datetime.datetime.now())[:19])
        f.write('\n' + LABEL_1 + '-' + LABEL_2 + '\n')
        f.close()

    # starting training
    print("Starting training...")
    sys.stdout.flush()
    den = X_train.shape[0] / batchsize
    for epoch in range(num_epochs):
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches_train(X_train,
                                               y_train,
                                               batchsize,
                                               shuffle=True):
            inputs, targets = batch
            history = architecture.fit(inputs, targets)
            train_err = train_err + np.mean(history.history['loss'])
            train_batches = train_batches + 1

        val_err = 0
        val_batches = 0
        preds = []
        targ = []
        for batch in iterate_minibatches(X_test,
                                         y_test,
                                         batchsize,
                                         shuffle=False):
            inputs, targets = batch
            err = architecture.evaluate(inputs, targets)
            val_err = val_err + np.mean(err)
            val_batches = val_batches + 1
            out = architecture.predict(inputs)
            [preds.append(i) for i in out]
            [targ.append(i) for i in targets]

        preds_tst = np.array(preds).argmax(axis=1)
        ##
        ## output
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        sys.stdout.flush()
        print("  training loss:\t\t{:.7f}".format(train_err / train_batches))
        sys.stdout.flush()
        print("  validation loss:\t\t{:.7f}".format(val_err / val_batches))
        sys.stdout.flush()
        print('  validation accuracy:\t\t{:.7f}'.format(
            accuracy_score(np.array(targ), preds_tst)))
        sys.stdout.flush()
        print('Confusion matrix for test:')
        sys.stdout.flush()
        print(confusion_matrix(np.array(targ), np.array(preds).argmax(axis=1)))
        sys.stdout.flush()
        rcs = roc_auc_score(np.array(targ), np.array(preds))
        sys.stderr.write('Pairwise ROC_AUCs: ' + str(rcs))
        print('')

        with open(FILE_PATH, 'a') as f:
            f.write("\nEpoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))
            f.write("\n training loss:\t\t{:.7f}".format(train_err /
                                                         train_batches))
            f.write("\n validation loss:\t\t{:.7f}".format(val_err /
                                                           val_batches))
            f.write('\n validation accuracy:\t\t{:.7f}'.format(
                accuracy_score(np.array(targ),
                               np.array(preds).argmax(axis=1))))

            f.write('\n Pairwise ROC_AUCs:' + str(rcs) + '\n')
            f.close()
        ## output
        ## saving results
        eps.append(epoch + 1)
        tr_losses.append(train_err / train_batches)
        val_losses.append(val_err / val_batches)
        val_accs.append(
            accuracy_score(np.array(targ),
                           np.array(preds).argmax(axis=1)))
        rocs.append(rcs)

    print('ended!')

    ### and save plots
    plt.figure(figsize=(15, 10))
    plt.subplot(2, 2, 1)
    plt.title('Loss ' + LABEL_1 + ' vs ' + LABEL_2)
    plt.xlabel('Epoch')
    plt.ylim((0, 3))
    plt.ylabel('Loss')
    plt.plot(eps, tr_losses, label='train')
    plt.plot(eps, val_losses, label='validation')
    plt.legend(loc=0)
    #
    plt.subplot(2, 2, 2)
    plt.title('Accuracy ' + LABEL_1 + ' vs ' + LABEL_2)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.plot(eps, val_accs, label='validation accuracy')
    plt.legend(loc=0)
    #
    plt.subplot(2, 2, 3)
    plt.title('AUC ' + LABEL_1 + ' vs ' + LABEL_2)
    plt.xlabel('Epoch')
    plt.ylabel('AUC')
    plt.plot(eps, np.array(rocs), label='validation auc')
    plt.legend(loc=0)
    #
    plt.subplot(2, 2, 4)
    plt.title('architecture')
    plt.axis('off')
    plt.text(
        0,
        -0.1,
        architecture,
        fontsize=7,
    )
    plt.savefig(PICTURE_PATH)
    ###########

    # write that trainig was ended
    with open(FILE_PATH, 'a') as f:
        f.write('\nended at ' + str(datetime.datetime.now())[:19] + '\n \n')
        f.close()

    # write report
    with open(REPORT_PATH, 'a') as f:
        f.write('\n' + LABEL_1 + ' vs ' + LABEL_2 + '\n' + report)
        #         f.write(architecture)
        f.write('final results are:')
        f.write('\n tr_loss: ' + str(tr_losses[-1]) + '\n val_loss: ' + \
                str(val_losses[-1]) + '\n val_acc; ' + str(val_accs[-1]) + \
                '\n val_roc_auc: ' + str(rocs[-1]))
        f.write('\nresults has been saved in files:\n')
        f.write(FILE_PATH + '\n')
        f.write(PICTURE_PATH + '\n')
        f.write('\n ___________________ \n\n\n')
        f.close()

    return tr_losses, val_losses, val_accs, rocs
        # LOOP EPOCHS
        print('\tTrain model')
        for epoch in range(MAX_EPOCHS):
            print('\tEpoch: ' + str(epoch + 1) + ' of ' + str(MAX_EPOCHS))
            # down sample
            inputs_train, targets_train = u_s.down_sample(
                inputs_=inputs_train_ep,
                targets_=targets_train_ep,
                no_class=NUM_CLASSES)

            max_mini_batch = np.ceil(1 + len(inputs_train) / BATCH_SIZE)
            _iter = 1
            for x_batch, y_batch in utils.iterate_minibatches(
                    batchsize=BATCH_SIZE,
                    inputs=inputs_train,
                    targets=targets_train,
                    shuffle=True):
                #

                _, _loss, _acc = sess.run(
                    fetches=[train_model, cross_entropy, accuracy],
                    feed_dict={
                        x_pl: x_batch,
                        y_pl: y_batch
                    })
                #
                print("\t\tminibatch: %d ~ %d\tLOSS: %f\tACCs: %f" %
                      (_iter, max_mini_batch, _loss, _acc),
                      end='\r')
                _iter += 1
示例#28
0



profile=False
test_count = 0
first = True
Q_conv_count=0
frozen_epoch=0

for epoch in range(num_epochs):
    start_time = time.time()
    loss = 0
    err = 0
    Q = 0
    for batch in utils.iterate_minibatches(inputs=train_data_resized, targets=train_labels, batchsize=batch_size):
        train_in, train_target = batch
        #train_in = train_in[:,np.newaxis,:,np.newaxis]
        tmp_sum, loss_, err_, Q_ = model.train(train_in, train_target, profile)
        if profile:
            fetched_timeline = timeline.Timeline(model.run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open('grcnn-timeline_01_step_0.json', 'w') as f:
                f.write(chrome_trace)        
        if first:
            model.writer.add_summary(tmp_sum, epoch)
        profile=False
        loss +=loss_
        err += err_
        Q += Q_
        #print loss
示例#29
0
def training(trainers, train_data, testers=[], test_data=[], num_epochs=20, logger=None):
    """
    TODO : Explain the whole function

    Params
    ------
        trainers:
        train_data:
        testers: (default=[])
        test_data: (default=[])
        num_epochs: (default=20)
        logger: (default=None)

    Return
    ------
        stats: dict with stats
    """
    if logger is None:
        logger = empty_logger()

    logger.info("Starting training...")
    final_stats = {}
    final_stats.update({trainer.name+' training loss': [] for trainer in trainers})
    final_stats.update({trainer.name+' valid loss': [] for trainer in trainers})
    final_stats.update({tester.name+' valid loss': [] for tester in testers})

    final_stats.update({(trainer.name+str(i)+' training acc' if trainer.train.n_returned_outputs > 2
                            else trainer.name+' training acc'): []
        for trainer in trainers for i in range(trainer.train.n_returned_outputs-1)})
    final_stats.update({(trainer.name+str(i)+' valid acc' if trainer.train.n_returned_outputs > 2
                            else trainer.name+' valid acc'): []
        for trainer in trainers for i in range(trainer.train.n_returned_outputs-1)})
    final_stats.update({(tester.name+str(i)+' valid acc' if tester.train.n_returned_outputs > 2
                            else tester.name+' valid acc'): []
        for tester in testers for i in range(tester.train.n_returned_outputs-1)})
    # final_stats.update({trainer.name+' valid acc': [] for trainer in trainers})
    # final_stats.update({tester.name+' valid acc': [] for tester in testers})

    for epoch in range(num_epochs):
        # Prepare the statistics
        start_time = time.time()
        stats = { key:[] for key in final_stats.keys()}

        # Do some trainning preparations :
        for data, trainer in zip(train_data+test_data, trainers+testers):
            trainer.preprocess(data, trainer, epoch)

        # Training : (forward and backward propagation)
        # done with the iterative functions
        batches = tuple(iterate_minibatches(data['X_train'], data['y_train'], data['batchsize'], shuffle=True) 
                        for data in train_data)
        for minibatches in zip(*batches):
            for batch, trainer in zip(minibatches, trainers):
                # X, y = batch
                res = trainer.train(*batch)
                loss, acc = res.pop(0), res
                # The first should be the loss
                stats[trainer.name+' training loss'].append(loss)
                # If we are in a normal case, res is only one accuracy
                if len(acc) == 1:
                    stats[trainer.name+' training acc'].append(acc*100)
                else: # Else we have multiple accuracies
                    for i, a in enumerate(acc):
                        stats[trainer.name+str(i)+' training acc'].append(a*100)

        # Validation (forward propagation)
        # done with the iterative functions
        batches = tuple(iterate_minibatches(data['X_val'], data['y_val'], data['batchsize']) 
                        for data in train_data+test_data)
        for minibatches in zip(*batches):
            for batch, valider in zip(minibatches, trainers+testers):
                # X, y = batch
                res = valider.valid(*batch)
                loss, acc = res.pop(0), res
                # The first should be the loss
                stats[valider.name+' valid loss'].append(loss)
                # If we are in a normal case, res is only one accuracy
                if len(acc) == 1:
                    stats[valider.name+' valid acc'].append(acc*100)
                else: # Else we have multiple accuracies
                    for i, a in enumerate(acc):
                        stats[valider.name+str(i)+' valid acc'].append(a*100)
        
        logger.info("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        for stat_name, stat_value in sorted(stats.items()):
            if stat_value:
                mean_value = np.mean(stat_value)
                logger.info('   {:30} : {:.6f}'.format(
                    stat_name, mean_value))
                final_stats[stat_name].append(mean_value)

    return final_stats
def train(data_folderpath='data/edges2shoes', image_size=256, ndf=64, ngf=64,
          lr_d=2e-4, lr_g=2e-4, n_iterations=int(1e6),
          batch_size=64, iters_per_checkpoint=100, n_checkpoint_samples=16,
          reconstruction_weight=100, out_dir='gan'):

    logger = SummaryWriter(out_dir)
    logger.add_scalar('d_lr', lr_d, 0)
    logger.add_scalar('g_lr', lr_g, 0)

    data_iterator = iterate_minibatches(
        data_folderpath + "/train/*.jpg", batch_size, image_size)
    val_data_iterator = iterate_minibatches(
        data_folderpath + "/val/*.jpg", n_checkpoint_samples, image_size)
    img_ab_fixed, _ = next(val_data_iterator)
    img_a_fixed, img_b_fixed = img_ab_fixed[:, 0], img_ab_fixed[:, 1]

    img_a_shape = img_a_fixed.shape[1:]
    img_b_shape = img_b_fixed.shape[1:]
    patch = int(img_a_shape[0] / 2**4)  # n_layers
    disc_patch = (patch, patch, 1)
    print("img a shape ", img_a_shape)
    print("img b shape ", img_b_shape)
    print("disc_patch ", disc_patch)

    # plot real text for reference
    log_images(img_a_fixed, 'real_a', '0', logger)
    log_images(img_b_fixed, 'real_b', '0', logger)

    # build models
    D = build_discriminator(
        img_a_shape, img_b_shape, ndf, activation='sigmoid')
    G = build_generator(img_a_shape, ngf)

    # build model outputs
    img_a_input = Input(shape=img_a_shape)
    img_b_input = Input(shape=img_b_shape)

    fake_samples = G(img_a_input)
    D_real = D([img_a_input, img_b_input])
    D_fake = D([img_a_input, fake_samples])

    loss_reconstruction = partial(mean_absolute_error,
                                  real_samples=img_b_input,
                                  fake_samples=fake_samples)
    loss_reconstruction.__name__ = 'loss_reconstruction'

    # define D graph and optimizer
    G.trainable = False
    D.trainable = True
    D_model = Model(inputs=[img_a_input, img_b_input],
                    outputs=[D_real, D_fake])
    D_model.compile(optimizer=Adam(lr_d, beta_1=0.5, beta_2=0.999),
                    loss='binary_crossentropy')

    # define D(G(z)) graph and optimizer
    G.trainable = True
    D.trainable = False
    G_model = Model(inputs=[img_a_input, img_b_input],
                    outputs=[D_fake, fake_samples])
    G_model.compile(Adam(lr=lr_g, beta_1=0.5, beta_2=0.999),
                    loss=['binary_crossentropy', loss_reconstruction],
                    loss_weights=[1, reconstruction_weight])

    ones = np.ones((batch_size, ) + disc_patch, dtype=np.float32)
    zeros = np.zeros((batch_size, ) + disc_patch, dtype=np.float32)
    dummy = zeros

    for i in range(n_iterations):
        D.trainable = True
        G.trainable = False

        image_ab_batch, _ = next(data_iterator)
        loss_d = D_model.train_on_batch(
            [image_ab_batch[:, 0], image_ab_batch[:, 1]],
            [ones, zeros])

        D.trainable = False
        G.trainable = True
        image_ab_batch, _ = next(data_iterator)
        loss_g = G_model.train_on_batch(
            [image_ab_batch[:, 0], image_ab_batch[:, 1]],
            [ones, dummy])

        print("iter", i)
        if (i % iters_per_checkpoint) == 0:
            G.trainable = False
            fake_image = G.predict(img_a_fixed)
            log_images(fake_image, 'val_fake', i, logger)
            save_model(G, out_dir)

        log_losses(loss_d, loss_g, i, logger)
示例#31
0
	
	eps = []
	best_val_acc = 0

	print "Start training\n"	
	for epoch in range(num_epochs):
	    # Calculate epoch time
	    start_time = time.time()
	    
	    # Full pass training set
	    train_err = 0
	    train_batches = 0
	    confusion_train = ConfusionMatrix(n_class)
	    
	    # Generate minibatches and train on each one of them	
	    for batch in iterate_minibatches(X_tr, y_tr, mask_tr, batch_size, shuffle=True):
		inputs, targets, in_masks = batch
		tr_err, predict = train_fn(inputs, targets, in_masks)
		train_err += tr_err
		train_batches += 1
		preds = np.argmax(predict, axis=-1)
		confusion_train.batch_add(targets, preds)
	    
	    train_loss = train_err / train_batches
	    train_accuracy = confusion_train.accuracy()
	    cf_train = confusion_train.ret_mat()	    

		
	    # Full pass validation set
	    val_err = 0
	    val_batches = 0
示例#32
0
def main(reps, pretrained_w_path, do_module1, init_seed=0, load_t=0, num_epochs=200,
    batchsize=96, fine_tune=0, patience=500, lr_init = 1e-3, optim='adagrad', toy=0,
    num_classes=23):
    res_root = '/home/hoa/Desktop/projects/resources'
    X_path=osp.join(res_root, 'datasets/msrcv2/Xaug_b01c.npy')
    Y_path=osp.join(res_root, 'datasets/msrcv2/Y.npy')
    MEAN_IMG_PATH=osp.join(res_root, 'models/ilsvrc_2012_mean.npy')
    snapshot=50 # save model after every `snapshot` epochs
    
    drop_p=0.5 # drop out prob.
    lambda2=0.0005/2 # l2-regularizer constant    
    # step=patience/4 # decay learning after every `step` epochs
    lr_patience=60 # for learning rate schedule, if optim=='momentum'    
    if toy: # unit testing
        num_epochs=10
        data_multi=3
        reps = 2        
        #drop_p=0
        #lambda2=0
    
    # Create name tag for the experiment
    if fine_tune:
        full_or_tune = 'tune' # description tag for storing associated files
    else:
        full_or_tune = 'full'
    time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime()) 
    snapshot_root = '../snapshot_models/'
    snapshot_name = str(num_classes)+'alex'+time_stamp+full_or_tune
    
    # LOADING DATA
    print 'LOADING DATA ...'
    X = np.load(X_path)
    Y = np.load(Y_path)
    if X.shape[1]!=3:
        X = b01c_to_bc01(X)
    N = len(Y)

    print 'Raw X,Y shape', X.shape, Y.shape
    if len(X) != len(Y):
        print 'Inconsistent number of input images and labels. X is possibly augmented.'
    
    MEAN_IMG = np.load(MEAN_IMG_PATH)
    MEAN_IMG_227 = skimage.transform.resize(
            np.swapaxes(np.swapaxes(MEAN_IMG,0,1),1,2), (227,227), mode='nearest', preserve_range=True)    
    MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227,1,2),0,1).reshape((1,3,227,227))

    all_metrics = [] # store metrics in each run
    time_profiles = {
    'train_module1': [],
    'train_module1_eff': [],
    'train_module2': [],
    'test': []
    } # record training and testing time
   
     # PREPARE THEANO EXPRESSION FOR BOTH MODULES
    print 'COMPILING THEANO EXPRESSION ...'
    input_var = T.tensor4('inputs')
    target_var = T.imatrix('targets')        
    network = build_model(num_classes=num_classes, input_var=input_var)    

    # Create a loss expression for training
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.binary_crossentropy(prediction, target_var) 
    weights = lasagne.layers.get_all_params(network, regularizable=True)
    l2reg = theano.shared(floatX(lambda2))*T.sum([T.sum(w ** 2) for w in weights])
    loss = loss.mean() + l2reg
    
    lr = theano.shared(np.array(lr_init, dtype=theano.config.floatX))
    lr_decay = np.array(1./3, dtype=theano.config.floatX)
    
    # Create update expressions for training
    params = lasagne.layers.get_all_params(network, trainable=True)
    # last-layer case is actually very simple:
    # `params` above is a list of all (W,b)-pairs
    # Therefore last layer's (W,b) is params[-2:]
    if fine_tune == 7: # tuning params from fc7 to fc8
        params = params[-2:] 
    # elif fine_tune == 6: # tuning params from fc6 to fc8
    #     params = params[-4:]
    # TODO adjust for per-layer training with local_lr
    
    if optim=='momentum':
        updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) 
    elif optim=='rmsprop':
        updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr, rho=0.9, epsilon=1e-06) 
    elif optim=='adam':
        updates = lasagne.updates.adam(
            loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08)
    elif optim=='adagrad':
        updates = lasagne.updates.adagrad(loss, params, learning_rate=lr, epsilon=1e-06)

    # Create a loss expression for validation/testing
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.binary_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean() + l2reg
    # zero-one loss with threshold t = 0.5 for reference
    # zero_one_loss = T.abs_((test_prediction > theano.shared(floatX(0.5))) - target_var).sum(axis=1)
    #zero_one_loss /= target_var.shape[1].astype(theano.config.floatX)
    #zero_one_loss = zero_one_loss.mean()
    
    # Compile a function performing a backward pass (training step)  on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    bwd_fn = theano.function([input_var, target_var], loss, updates=updates,)
    # Compile a second function performing a forward pass, 
    # returns validation loss, 0/1 Error, score i.e. Xout:
    fwd_fn = theano.function([input_var, target_var], test_loss)

    # Create a theano function for computing score
    score = lasagne.layers.get_output(network, deterministic=True)
    score_fn = theano.function([input_var], score)

    def compute_score(X, Y, batchsize=batchsize, shuffle=False):
        out = np.zeros(Y.shape)
        batch_id = 0
        for batch in iterate_minibatches(X, Y, batchsize, shuffle=False):
            inputs, _ = batch
            # Flip random half of the batch
            flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False)
            if len(flip_idx)>1:
                inputs[flip_idx] = inputs[flip_idx,:,:,::-1]
            # Substract mean image
            inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) 
            # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead
            if len(inputs)==batchsize:
                out[batch_id*batchsize : (batch_id+1)*batchsize] = score_fn(inputs)
                batch_id += 1
            else:
                out[batch_id*batchsize : ] = score_fn(inputs)
                
        return out

    try:
        #  MAIN LOOP FOR EACH RUN    
        for seed in np.arange(reps)+init_seed:            
            # reset learning rate
            lr.set_value(lr_init)

            print '\nRUN', seed, '...'
            # Split train/val/test set
            indicies = np.arange(len(Y))
            Y_train_val, Y_test, idx_train_val, idx_test = train_test_split(
                Y, indicies, random_state=seed, train_size=float(2)/3)
            Y_train, Y_val, idx_train, idx_val = train_test_split(
                Y_train_val, idx_train_val, random_state=seed)
            
            print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test)

            idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx', N=N)
            Xaug_train = X[idx_aug_train]
            Yaug_train = data_aug(Y_train, mode='aug', isMat='Y', N=N)

            idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx', N=N)
            Xaug_val = X[idx_aug_val]
            Yaug_val = data_aug(Y_val, mode='aug', isMat='Y', N=N)

            # Module 2 training set is composed of module 1 training and validation set 
            idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx', N=N)
            Xaug_train_val = X[idx_aug_train_val]
            Yaug_train_val = data_aug(Y_train_val, mode='aug', isMat='Y', N=N)

            # Test set
            X_test = X[idx_test]
            # Y_test is already returned in the first train_test_split

            print "Augmented train/val/test set size:",len(Xaug_train),len(Yaug_val), len(X_test)
            print "Augmented (X,Y) dtype:", Xaug_train.dtype, Yaug_val.dtype
            print "Processed Mean image:",MEAN_IMG.dtype,MEAN_IMG.shape

            if toy: # try to overfit a tiny subset of the data
                Xaug_train = Xaug_train[:batchsize*data_multi + batchsize/2]
                Yaug_train = Yaug_train[:batchsize*data_multi + batchsize/2]
                Xaug_val = Xaug_val[:batchsize + batchsize/2]
                Yaug_val = Yaug_val[:batchsize + batchsize/2]

            # Init by pre-trained weights, if any
            if len(pretrained_w_path)>0:
                layer_list = lasagne.layers.get_all_layers(network) # 22 layers
                if pretrained_w_path.endswith('pkl'): 
                # load reference_net
                # use case: weights initialized from pre-trained reference nets                
                    f = open(pretrained_w_path, 'r')
                    w_list = pickle.load(f) # list of 11 (W,b)-pairs
                    f.close()
                    
                    lasagne.layers.set_all_param_values(layer_list[-3], w_list[:-2]) 
                    # exclude (W,b) of fc8
                    # BIG NOTE: don't be confused, it's pure coincident that layer_list 
                    # and w_list have the same index here. The last element of layer_list are 
                    # [.., fc6, drop6, fc7, drop7, fc8], while w_list are 
                    # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to
                    # params that are associated with fc7 i.e. params that connect drop6 to fc7
                    
                    
                elif pretrained_w_path.endswith('npz'): 
                # load self-trained net 
                # use case: continue training from a snapshot model
                    with np.load(pretrained_w_path) as f: # NOTE: only load snapshot of the same `seed`
                        # w_list = [f['arr_%d' % i] for i in range(len(f.files))] 
                        w_list = [f.items()['arr_%d' % i] for i in range(len(f.files))] # load from bkviz, one-time use
                    lasagne.layers.set_all_param_values(network, w_list)

                elif pretrained_w_path.endswith('/'): # init from 1 of the 30 snapshots
                    from os import listdir
                    import re
                    files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))]
                    for file_name in files:
                        regex_seed = 'full%d_' %seed
                        match_seed = re.search(regex_seed, file_name)
                        if match_seed:
                            regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+"
                            match = re.search(regex, file_name)
                            snapshot_name = match.group(0)
                            print snapshot_name
                            with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: 
                                w_list = [f['arr_%d' % i] for i in range(len(f.files))] 
                            lasagne.layers.set_all_param_values(network, w_list)

            # START MODULE 1
            module1_time = 0
            if do_module1:
                print 'MODULE 1' 
                training_history={}
                training_history['iter_training_loss'] = []
                training_history['iter_validation_loss'] = []
                training_history['training_loss'] = []
                training_history['validation_loss'] = []
                training_history['learning_rate'] = []
                
                # http://deeplearning.net/tutorial/gettingstarted.html#early-stopping
                # early-stopping parameters
                n_train_batches = Xaug_train.shape[0] / batchsize
                if Xaug_train.shape[0] % batchsize != 0:
                    n_train_batches += 1
                patience = patience  # look as this many examples regardless
                patience_increase = 2     # wait this much longer when a new best is found
                lr_patience_increase = 1.01
                improvement_threshold = 0.995  # a relative improvement of this much is
                                               # considered significant; a significant test
                                               # MIGHT be better
                validation_frequency = min(n_train_batches, patience/2)
                                              # go through this many
                                              # minibatches before checking the network
                                              # on the validation set; in this case we
                                              # check every epoch
                best_params = None
                epoch_validation_loss = 0 # indicates that valid_loss has not been computed yet
                best_validation_loss = np.inf
                best_iter = -1
                lr_iter = -1
                test_score = 0.
                start_time = time.time()
                done_looping = False
                epoch = 0
                
                # Finally, launch the training loop.
                print("Starting training...")
                # We iterate over epochs:
                print("\nEpoch\tTrain Loss\tValid Loss\tBest-ValLoss-and-Iter\tTime\tL.Rate")
                sys.setrecursionlimit(10000)

                try: # Early-stopping implementation
                    while (not done_looping) and (epoch<num_epochs):
                        # In each epoch, we do a full pass over the training data:
                        train_err = 0
                        train_batches = 0
                        start_time = time.time()
                        for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True):
                            inputs, targets = batch
                            # Horizontal flip half of the images
                            bs = inputs.shape[0]
                            indices = np.random.choice(bs, bs / 2, replace=False)
                            inputs[indices] = inputs[indices, :, :, ::-1]
                            
                            # Substract mean image
                            inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) 
                            # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead
                    
                            train_err_batch = bwd_fn(inputs, targets) 
                            train_err += train_err_batch            
                            train_batches += 1
                            
                            iter_now = epoch*n_train_batches + train_batches
                            training_history['iter_training_loss'].append(train_err_batch)
                            training_history['iter_validation_loss'].append(epoch_validation_loss)
                            
                            if (iter_now+1) % validation_frequency == 0:
                                # a full pass over the validation data:       
                                val_err = 0
                                #zero_one_err = 0
                                val_batches = 0
                                for batch in iterate_minibatches(Xaug_val, Yaug_val, batchsize, shuffle=False):
                                    inputs, targets = batch
                                    # Substract mean image
                                    inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) 
                                    # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead
                                    
                                    val_err_batch = fwd_fn(inputs, targets)
                                    val_err += val_err_batch
                                    val_batches += 1                
                                epoch_validation_loss = val_err / val_batches
                                if epoch_validation_loss < best_validation_loss:
                                    if epoch_validation_loss < best_validation_loss*improvement_threshold:
                                        patience = max(patience, iter_now * patience_increase)
                                        # lr_patience *= lr_patience_increase
                                        
                                    best_params = lasagne.layers.get_all_param_values(network)
                                    best_validation_loss = epoch_validation_loss
                                    best_iter = iter_now
                                    lr_iter = best_iter


                                else: # decay learning rate if optim=='momentum'
                                    if optim=='momentum' and (iter_now - lr_iter) >  lr_patience:
                                        lr.set_value(lr.get_value() * lr_decay) 
                                        lr_iter = iter_now
                            
                            if patience <= iter_now:
                                done_looping = True
                                break
                        
                        # Record training history
                        training_history['training_loss'].append(train_err / train_batches)
                        training_history['validation_loss'].append(epoch_validation_loss)
                        training_history['learning_rate'].append(lr.get_value())

                        epoch_time = time.time() - start_time
                        module1_time += epoch_time
                        # Then we print the results for this epoch:
                        print("{}\t{:.6f}\t{:.6f}\t{:.6f}\t{}\t{:.3f}\t{}".format(
                                epoch+1, 
                                training_history['training_loss'][-1],
                                training_history['validation_loss'][-1],
                                best_validation_loss,
                                best_iter+1,
                                epoch_time,
                                training_history['learning_rate'][-1]
                            ))
                        
                        if (epoch+1)%snapshot==0: # TODO try to save weights at best_iter
                            snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1)
                            try: # use case: terminate experiment before reaching `reps`
                                np.savez(snapshot_path_string+'.npz', *best_params)
                                np.savez(snapshot_path_string+'_history.npz', training_history)
                                plot_loss(training_history, snapshot_path_string+'_loss.png')
                                # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], 
                                #     snapshot_path_string+'_conv1weights_')
                            except KeyboardInterrupt, TypeError:
                                print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1)
                                pass

                        epoch += 1

                except KeyboardInterrupt, MemoryError: # Sadly this can only catch KeyboardInterrupt
                    pass
                print 'Training finished or KeyboardInterrupt (Training is never finished, only abandoned)'
                
                module1_time_eff = module1_time / iter_now * best_iter 
                print('Total and Effective training time are {:.0f} and {:.0f}').format(
                    module1_time, module1_time_eff)
                time_profiles['train_module1'].append(module1_time)
                time_profiles['train_module1_eff'].append(module1_time_eff)
                
                # Save model after num_epochs or KeyboardInterrupt
                if (epoch+1)%snapshot!=0: # to avoid duplicate save
                    snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1)
                    if not toy:
                        try: # use case: terminate experiment before reaching `reps`
                            print 'Saving model...'
                            np.savez(snapshot_path_string+'.npz', *best_params)
                            np.savez(snapshot_path_string+'_history.npz', training_history)
                            plot_loss(training_history, snapshot_path_string+'_loss.png')
                            # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], 
                            #     snapshot_path_string+'_conv1weights_')
                        except KeyboardInterrupt, TypeError:
                            print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1)
                            pass
                # And load them again later on like this:
                #with np.load('../snapshot_models/23alex16042023213910.npz') as f:
                #    param_values = [f['arr_%d' % i] for i in range(len(f.files))] # or
                #    training_history = f['arr_0'].items()
                # lasagne.layers.set_all_param_values(network, param_values)                
            
            # END OF MODULE 1             
                
            # START MODULE 2
            print '\nMODULE 2' 
            if not do_module1:
                if pretrained_w_path.endswith('pkl'):
                    snapshot_name = str(num_classes)+'alexOTS' # short for "off-the-shelf init"
                
                elif pretrained_w_path.endswith('npz'): # Resume from a SINGLE snapshot
                    # extract name pattern, e.g. '23alex16042023213910full10' 
                    # from string '../snapshot_models/23alex16042023213910full10_100.npz'
                    import re
                    regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+"
                    match = re.search(regex, pretrained_w_path)
                    snapshot_name = match.group(0)
                
                elif pretrained_w_path.endswith('/'): # RESUMED FROM TRAINED MODULE 1 (ONE-TIME USE)
                    from os import listdir
                    import re
                    files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))]
                    for file_name in files:
                        regex_seed = 'full%d_' %seed
                        match_seed = re.search(regex_seed, file_name)
                        if match_seed:
                            regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+"
                            match = re.search(regex, file_name)
                            snapshot_name = match.group(0)
                            print snapshot_name
                            with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: 
                                w_list = [f['arr_%d' % i] for i in range(len(f.files))] 
                            lasagne.layers.set_all_param_values(network, w_list)

            else: # MAIN BRANCH - assume do_module1 is True AND have run `snapshot` epochs
                if (epoch+1)>snapshot: 
                    with np.load(snapshot_path_string+'.npz') as f: # reload the best params for module 1 
                        w_list = [f['arr_%d' % i] for i in range(len(f.files))] 
                    lasagne.layers.set_all_param_values(network, w_list)
           
            score_train = compute_score(Xaug_train_val, Yaug_train_val)
            start_time = time.time()

            if load_t: # Server failed at the wrong time. We only have t backed-up
                if pretrained_w_path.endswith('/'):
                    from os import listdir
                    import re
                    files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))]
                    for file_name in files:
                        regex_seed = 'full%d_' %seed
                        match_seed = re.search(regex_seed, file_name)
                        if match_seed:
                            regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+"
                            match = re.search(regex, file_name)
                            snapshot_name = match.group(0)
                            t_train = np.load(osp.join('t','{0}.npy'.format(snapshot_name)))

            else: # MAIN BRANCH
                thresholds = Threshold(score_train, Yaug_train_val)
                thresholds.find_t_for() # determine t_train for each score_train. It will take a while
                t_train = np.asarray(thresholds.t)
                print 't_train is in ', t_train.min(), '..', t_train.max() 
                # `thresholds` holds t_train vector in .t attribute
                print('t_train produced in {:.3f}s').format(time.time()-start_time)
                np.save('t/'+snapshot_name+str(seed)+'.npy', t_train)

            
            # Predictive model for t
            regr = linear_model.RidgeCV(cv=5) 
            # Ridge() is LinearClassifier() with L2-reg
            regr.fit(score_train, t_train) 

            time_profiles['train_module2'].append(time.time()-start_time)
            # END OF MODULE 2        

            # TESTING PHASE
            start_time = time.time()
            score_test = compute_score(X_test, Y_test)
            t_test = regr.predict(score_test)
            print 'original t_test is in ', min(t_test), '..', max(t_test)
            t_test[t_test>1] = max(t_test[t_test<1])
            t_test[t_test<0] = min(t_test[t_test>0]) # ! Keep t_test in [0,1]
            print 'corrected t_test is in ', min(t_test), '..', max(t_test) 
            
            # Predict label 
            metrics = predict_label(score_test, Y_test, t_test, seed, num_classes, verbose=1)        
            time_profiles['test'].append(time.time()-start_time)

            all_metrics.append(metrics)
def run(X_train, y_train, X_val, y_val, X_test, y_test):
    import pickle
    import cProfile
    kron_params = [{
        'param_density': p
    } for p in np.linspace(0.0, 0.0, 1, endpoint=False)]
    num_epochs = 5

    batch_size = 100

    hidden_units = [100**2]

    trains, accs = list(
        zip(*([
            generate_train_acc(
                widths=hidden_units, type="old_kron", params=kron_param)
            for kron_param in kron_params
        ])))

    names = ["old_kron({})".format(p.values()) for p in kron_params]
    results = {}

    for train, acc, name in zip(trains, accs, names):
        res = {}
        res["train_fun"] = train
        res["accuracy_fun"] = acc
        res["train_err"] = []
        res["train_acc"] = []
        res["epoch_times"] = []
        res["val_acc"] = []
        results[name] = res

    # Just profile if you need
    pr = cProfile.Profile()
    pr.enable()
    for epoch in range(num_epochs):
        for (res_name, res) in results.items():
            train_err = 0
            train_acc = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train, y_train, batch_size):
                inputs, targets = batch
                train_err_batch, train_acc_batch = res["train_fun"](inputs,
                                                                    targets)
                train_err += train_err_batch
                train_acc += train_acc_batch
                train_batches += 1

            # And a full pass over the validation data:
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, batch_size):
                inputs, targets = batch
                val_acc += res["accuracy_fun"](inputs, targets)
                val_batches += 1

            # Then we print the results for this epoch:
            print("for {}".format(res_name))
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))

            print("  training loss (in-iteration):\t\t{:.6f}".format(
                train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(train_acc /
                                                         train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            res["train_err"].append(train_err / train_batches)
            res["train_acc"].append(train_acc / train_batches * 100)
            res["val_acc"].append(val_acc / val_batches * 100)
    # Just profile if you need
    pr.disable()
    pr.print_stats(sort='cumtime')
    for res in results.values():
        res.pop('train_fun')
        res.pop('accuracy_fun')
    with open("comparative_history.dict", 'wb') as pickle_file:
        pickle.dump(results, pickle_file)
def train(n_channels=3,
          resolution=32,
          z_dim=128,
          n_labels=0,
          lr=1e-3,
          e_drift=1e-3,
          wgp_target=750,
          initial_resolution=4,
          total_kimg=25000,
          training_kimg=500,
          transition_kimg=500,
          iters_per_checkpoint=500,
          n_checkpoint_images=16,
          glob_str='cifar10',
          out_dir='cifar10'):

    # instantiate logger
    logger = SummaryWriter(out_dir)

    # load data
    batch_size = MINIBATCH_OVERWRITES[0]
    train_iterator = iterate_minibatches(glob_str, batch_size, resolution)

    # build models
    G = Generator(n_channels, resolution, z_dim, n_labels)
    D = Discriminator(n_channels, resolution, n_labels)

    G_train, D_train = GAN(G, D, z_dim, n_labels, resolution, n_channels)

    D_opt = Adam(lr=lr, beta_1=0.0, beta_2=0.99, epsilon=1e-8)
    G_opt = Adam(lr=lr, beta_1=0.0, beta_2=0.99, epsilon=1e-8)

    # define loss functions
    D_loss = [loss_mean, loss_gradient_penalty, 'mse']
    G_loss = [loss_wasserstein]

    # compile graphs used during training
    G.compile(G_opt, loss=loss_wasserstein)
    D.trainable = False
    G_train.compile(G_opt, loss=G_loss)
    D.trainable = True
    D_train.compile(D_opt, loss=D_loss, loss_weights=[1, GP_WEIGHT, e_drift])

    # for computing the loss
    ones = np.ones((batch_size, 1), dtype=np.float32)
    zeros = ones * 0.0

    # fix a z vector for training evaluation
    z_fixed = np.random.normal(0, 1, size=(n_checkpoint_images, z_dim))

    # vars
    resolution_log2 = int(np.log2(resolution))
    starting_block = resolution_log2
    starting_block -= np.floor(np.log2(initial_resolution))
    cur_block = starting_block
    cur_nimg = 0

    # compute duration of each phase and use proxy to update minibatch size
    phase_kdur = training_kimg + transition_kimg
    phase_idx_prev = 0

    # offset variable for transitioning between blocks
    offset = 0
    i = 0
    while cur_nimg < total_kimg * 1000:
        # block processing
        kimg = cur_nimg / 1000.0
        phase_idx = int(np.floor((kimg + transition_kimg) / phase_kdur))
        phase_idx = max(phase_idx, 0.0)
        phase_kimg = phase_idx * phase_kdur

        # update batch size and ones vector if we switched phases
        if phase_idx_prev < phase_idx:
            batch_size = MINIBATCH_OVERWRITES[phase_idx]
            train_iterator = iterate_minibatches(glob_str, batch_size)
            ones = np.ones((batch_size, 1), dtype=np.float32)
            zeros = ones * 0.0
            phase_idx_prev = phase_idx

        # possibly gradually update current level of detail
        if transition_kimg > 0 and phase_idx > 0:
            offset = (kimg + transition_kimg - phase_kimg) / transition_kimg
            offset = min(offset, 1.0)
            offset = offset + phase_idx - 1
            cur_block = max(starting_block - offset, 0.0)

        # update level of detail
        K.set_value(G_train.cur_block, np.float32(cur_block))
        K.set_value(D_train.cur_block, np.float32(cur_block))

        # train D
        for j in range(N_CRITIC_ITERS):
            z = np.random.normal(0, 1, size=(batch_size, z_dim))
            real_batch = next(train_iterator)
            fake_batch = G.predict_on_batch([z])
            interpolated_batch = get_interpolated_images(
                real_batch, fake_batch)
            losses_d = D_train.train_on_batch(
                [real_batch, fake_batch, interpolated_batch],
                [ones, ones * wgp_target, zeros])
            cur_nimg += batch_size

        # train G
        z = np.random.normal(0, 1, size=(batch_size, z_dim))
        loss_g = G_train.train_on_batch(z, -1 * ones)

        logger.add_scalar("cur_block", cur_block, i)
        logger.add_scalar("learning_rate", lr, i)
        logger.add_scalar("batch_size", z.shape[0], i)
        print("iter", i, "cur_block", cur_block, "lr", lr, "kimg", kimg,
              "losses_d", losses_d, "loss_g", loss_g)
        if (i % iters_per_checkpoint) == 0:
            G.trainable = False
            fake_images = G.predict(z_fixed)
            # log fake images
            log_images(fake_images, 'fake', i, logger, fake_images.shape[1],
                       fake_images.shape[2], int(np.sqrt(n_checkpoint_images)))

            # plot real images for reference
            log_images(real_batch[:n_checkpoint_images], 'real', i, logger,
                       real_batch.shape[1], real_batch.shape[2],
                       int(np.sqrt(n_checkpoint_images)))

            # save the model to eventually resume training or do inference
            save_model(G, out_dir + "/model.json", out_dir + "/model.h5")

        log_losses(losses_d, loss_g, i, logger)
        i += 1
示例#35
0
    def train(self):
        """Trains a model."""

        steps = 0

        # =========
        # evaluate on development set
        val_aer, val_acc, val_loss = self.model.evaluate(
            self.dev_corpus, self.dev_wa, batch_size=self.batch_size)

        # print Epoch loss
        print("Epoch {} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}".format(
            0, val_aer, val_acc, val_loss))
        #========

        for epoch_id in range(1, self.num_epochs + 1):

            # shuffle data set every epoch
            print("Shuffling training data")
            random.shuffle(self.corpus)

            loss = 0.0
            accuracy_correct = 0
            accuracy_total = 0
            epoch_steps = 0

            for batch_id, batch in enumerate(
                    iterate_minibatches(self.corpus,
                                        batch_size=self.batch_size), 1):

                # Dynamic learning rate, cf. Bottou (2012), Stochastic gradient descent tricks.
                lr_t = self.lr * (1 + self.lr * self.lr_decay * steps)**-1

                x, y = prepare_data(batch, self.model.x_vocabulary,
                                    self.model.y_vocabulary)

                # If you want to see the data that goes into the model during training
                # you may uncomment this.
                #if batch_id % 1000 == 0:
                #    print(" ".join([str(t) for t in x[0]]))
                #    print(" ".join([str(t) for t in y[0]]))
                #    print(" ".join([self.model.x_vocabulary.get_token(t) for t in x[0]]))
                #    print(" ".join([self.model.y_vocabulary.get_token(t) for t in y[0]]))

                # input to the TF graph
                feed_dict = {
                    self.lr_ph: lr_t,
                    self.model.x: x,
                    self.model.y: y,
                    self.model.is_training: True
                }

                # things we want TF to return to us from the computation
                fetches = {
                    "optimizer": self.optimizer,
                    "loss": self.model.loss,
                    "acc_correct": self.model.accuracy_correct,
                    "acc_total": self.model.accuracy_total,
                    "pa_x": self.model.pa_x,
                    "py_xa": self.model.py_xa,
                    "py_x": self.model.py_x,
                    "KL": self.model.KL
                    # "a"           : self.model.a,
                    # "b"           : self.model.b,
                    # "alpha"       : self.model.alpha,
                    # "beta"        : self.model.beta
                }

                res = self.session.run(fetches, feed_dict=feed_dict)

                loss += res["loss"]
                accuracy_correct += res["acc_correct"]
                accuracy_total += res["acc_total"]
                batch_accuracy = res["acc_correct"] / float(res["acc_total"])
                steps += 1
                epoch_steps += 1

                if batch_id % 100 == 0:
                    # print(res["KL"])
                    # print(res["a"])
                    # print(res["b"])
                    # print(res["alpha"])
                    # print(res["beta"])
                    print("Iter {:5d} loss {:6f} accuracy {:1.2f} lr {:1.6f}".
                          format(batch_id, res["loss"], batch_accuracy, lr_t))

            # evaluate on development set
            val_aer, val_acc, val_loss = self.model.evaluate(
                self.dev_corpus,
                self.dev_wa,
                batch_size=self.batch_size,
                training=True)

            # print Epoch loss
            print(
                "Train=true: Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}"
                .format(epoch_id, loss / float(epoch_steps),
                        accuracy_correct / float(accuracy_total), val_aer,
                        val_acc, val_loss))

            val_aer, val_acc, val_loss = self.model.evaluate(
                self.dev_corpus, self.dev_wa, batch_size=self.batch_size)

            # print Epoch loss
            print(
                "Train=False: Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}"
                .format(epoch_id, loss / float(epoch_steps),
                        accuracy_correct / float(accuracy_total), val_aer,
                        val_acc, val_loss))

            # save parameters
            save_path = self.model.save(self.session, path="model.ckpt")
            print("Model saved in file: %s" % save_path)
示例#36
0
  def train(self):
    """Trains a model."""
    
    steps = 0

    for epoch_id in range(1, self.num_epochs + 1):
      
      # shuffle data set every epoch
      print("Shuffling training data")
      random.shuffle(self.corpus)
      
      loss = 0.0
      accuracy_correct = 0
      accuracy_total = 0
      epoch_steps = 0

      for batch_id, batch in enumerate(iterate_minibatches(
          self.corpus, batch_size=self.batch_size), 1):
        
        # Dynamic learning rate, cf. Bottou (2012), Stochastic gradient descent tricks.
        lr_t = self.lr * (1 + self.lr * self.lr_decay * steps)**-1
        
        x, y = prepare_data(batch, self.model.x_vocabulary, 
                            self.model.y_vocabulary)
        
        # If you want to see the data that goes into the model during training
        # you may uncomment this.
        #if batch_id % 1000 == 0:
        #    print(" ".join([str(t) for t in x[0]]))
        #    print(" ".join([str(t) for t in y[0]]))
        #    print(" ".join([self.model.x_vocabulary.get_token(t) for t in x[0]]))
        #    print(" ".join([self.model.y_vocabulary.get_token(t) for t in y[0]]))

        # input to the TF graph
        feed_dict = { 
          self.lr_ph : lr_t,
          self.model.x : x, 
          self.model.y : y
        }
        
        # things we want TF to return to us from the computation
        fetches = {
          "optimizer"   : self.optimizer,
          "loss"        : self.model.loss,
          "acc_correct" : self.model.accuracy_correct,
          "acc_total"   : self.model.accuracy_total,
          "pa_x"        : self.model.pa_x,
          "py_xa"       : self.model.py_xa,
          "py_x"        : self.model.py_x
        }
        res = self.session.run(fetches, feed_dict=feed_dict)
        loss += res["loss"]
        accuracy_correct += res["acc_correct"]
        accuracy_total += res["acc_total"]
        batch_accuracy = res["acc_correct"] / float(res["acc_total"])
        steps += 1
        epoch_steps += 1
        
        if batch_id % 100 == 0:
          print("Iter {:5d} loss {:6f} accuracy {:1.2f} lr {:1.6f}".format(
            batch_id, res["loss"], batch_accuracy, lr_t))
        
        if batch_id % 5000 == 0:  #break after 5000, to keep computation time down.
          break

      # evaluate on development set
      val_aer, val_acc = self.model.evaluate(self.dev_corpus, self.dev_wa)
      self.Aer.append(val_aer)
      self.Loss.append(loss)
      
      # print Epoch loss    
      print("Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f}".format(
          epoch_id, 
          loss / float(epoch_steps), 
          accuracy_correct / float(accuracy_total),
          val_aer, val_acc))
        
      # save parameters
      save_path = self.model.save(self.session, path="D:/Roderick/Documents/Master/5 NLP2/project three/project_neuralibm/model.ckpt")
      print("Model saved in file: %s" % save_path)

    plt.figure()
    plt.title('AER')
    plt.plot(self.Aer)
    print("AER", self.Aer)
    
    plt.figure()
    plt.title('Loss')
    plt.plot(self.Loss)
    print("Loss", self.Loss)