def evaluate(self, data, ref_alignments, batch_size=4, training=False): """Evaluate the model on a data set.""" ref_align = read_naacl_alignments(ref_alignments) ref_iterator = iter(ref_align) metric = AERSufficientStatistics() accuracy_correct = 0 accuracy_total = 0 loss_total = 0 steps = 0. for batch_id, batch in enumerate(iterate_minibatches(data, batch_size=batch_size)): x, y = prepare_data(batch, self.x_vocabulary, self.y_vocabulary) y_len = np.sum(np.sign(y), axis=1, dtype="int64") align, prob, acc_correct, acc_total, loss = self.get_viterbi(x, y, training) accuracy_correct += acc_correct accuracy_total += acc_total loss_total += loss steps += 1 for alignment, N, (sure, probable) in zip(align, y_len, ref_iterator): # the evaluation ignores NULL links, so we discard them # j is 1-based in the naacl format pred = set((aj, j) for j, aj in enumerate(alignment[:N], 1) if aj > 0) metric.update(sure=sure, probable=probable, predicted=pred) # print(batch[s]) # print(alignment[:N]) # print(pred) # s +=1 accuracy = accuracy_correct / float(accuracy_total) return metric.aer(), accuracy, loss_total/float(steps)
def train_regressor(model, iters=2000, batchsize=100, resample=False, optimizer=None, log_likelihood=gaussian_log_likelihood): X = (model.X - model.mx) * model.iSx Y = (model.Y - model.my) * model.iSy N = X.shape[0] M = batchsize if optimizer is None: params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(params, 1e-3) pbar = tqdm.tqdm(enumerate(iterate_minibatches(X, Y, M)), total=iters) for i, batch in pbar: x, y = batch model.zero_grad() outs = model(x, normalize=False, resample=resample) Enlml = -log_likelihood(y, *outs).mean() loss = Enlml + model.regularization_loss() / N loss.backward() optimizer.step() pbar.set_description('log-likelihood of data: %f' % (-Enlml)) if i == iters: pbar.close() break
def predict_label(words, masks, chars, predict_fn, alphabet_label): predict_list = [] for batch in utils.iterate_minibatches(words, masks=masks, char_inputs=chars): word_inputs, mask_inputs, char_inputs = batch predicts = predict_fn(word_inputs, mask_inputs, char_inputs) predict_list += utils.output_predictions(predicts, mask_inputs, alphabet_label) return predict_list
def train(self, paths): assert self.sess is not None obs = numpy.concatenate([path['observations'] for path in paths]) returns = numpy.concatenate([path['returns'] for path in paths]) if self.batch_size is not None and obs.shape[0] >= self.batch_size: for x, z in iterate_minibatches([obs, returns], self.batch_size, shuffle=True): self.sess.run(self.train_op, feed_dict={self.x: x, self.z: z}) else: self.sess.run(self.train_op, feed_dict={self.x: obs, self.z: returns})
def training(self, source, num_epochs=50, logger=None): """ training procedure. Used to train a multiple output network. """ if logger is None: logger = new_logger() logger.info("Starting training...") final_stats = { 'source training loss': [], 'source training acc': [], 'source valid loss': [], 'source valid acc': [], } for epoch in range(num_epochs): start_time = time.time() stats = { key:[] for key in final_stats.keys()} # training (forward and backward propagation) source_batches = iterate_minibatches(source['X_train'], source['y_train'], source['batchsize'], shuffle=True) for source_batch in source_batches: X, y = source_batch loss, acc = self.train_label(X, y) stats['source training loss'].append(loss) stats['source training acc'].append(acc*100) # Validation (forward propagation) source_batches = iterate_minibatches(source['X_val'], source['y_val'], source['batchsize']) for source_batch in source_batches: X, y = source_batch loss, acc = self.valid_label(X, y) stats['source valid loss'].append(loss) stats['source valid acc'].append(acc*100) logger.info("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) for stat_name, stat_value in sorted(stats.items()): if stat_value: mean_value = np.mean(stat_value) logger.info(' {:30} : {:.6f}'.format( stat_name, mean_value)) final_stats[stat_name].append(mean_value) return final_stats
def test(test_data, test_labels, batch_size, model, test_batch_num): accuracy=0.0 keep_probs_values = [1.0 for i in range(len(model.keep_probs_values))] for batch in utils.iterate_minibatches(inputs=test_data, targets=test_labels, batchsize=batch_size): test_in, test_target = batch #test_in = test_in[:,np.newaxis,:,np.newaxis] #print model.sess.run(tf.reduce_sum(tf.equal(tf.argmax(model.output_layer,1), tf.argmax(model.y, 1))) , # feed_dict={model.x:test_in, model.y:test_target}) accuracy += model.sess.run(tf.reduce_mean(tf.cast(tf.equal(tf.argmax(model.output_layer,1), tf.argmax(model.y, 1)), tf.float32)), feed_dict={model.x:test_in, model.y:test_target, model.keep_probs:keep_probs_values}) print'accuracy: {}'.format(accuracy/test_batch_num) return accuracy/test_batch_num
def _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc): # a full pass over the validation data: val_err = 0.0 val_acc = 0.0 val_batches = 0 for batch in utils.iterate_minibatches(X_val, next_problem_val, truth_val, batchsize, shuffle=False): X_, next_problem_, truth_ = batch err, acc = compute_cost_acc(X_, next_problem_, truth_) val_err += err val_acc += acc val_batches += 1 val_loss = val_err/val_batches val_acc = val_acc/val_batches * 100 return val_loss, val_acc
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_steps=10): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, 2)) emb_a, emb_b = val_data[1] txts = val_data[2] # add batch dimension emb_a, emb_b = emb_a[None, :], emb_b[None, :] # sample z vector for inference z = np.random.uniform(-1, 1, size=(1, z_dim)) G.trainable = False # predict using embeddings a and b fake_image_a = G.predict([z, emb_a])[0] fake_image_b = G.predict([z, emb_b])[0] # add and subtract emb_add = (emb_a + emb_b) emb_a_sub_b = (emb_a - emb_b) emb_b_sub_a = (emb_b - emb_a) # generate images fake_a = G.predict([z, emb_a])[0] fake_b = G.predict([z, emb_b])[0] fake_add = G.predict([z, emb_add])[0] fake_a_sub_b = G.predict([z, emb_a_sub_b])[0] fake_b_sub_a = G.predict([z, emb_b_sub_a])[0] fake_a = ((fake_a + 1) * 0.5) fake_b = ((fake_b + 1) * 0.5) fake_add = ((fake_add + 1) * 0.5) fake_a_sub_b = ((fake_a_sub_b + 1) * 0.5) fake_b_sub_a = ((fake_b_sub_a + 1) * 0.5) plt.imsave("{}/fake_text_arithmetic_a".format(out_dir), fake_a) plt.imsave("{}/fake_text_arithmetic_b".format(out_dir), fake_b) plt.imsave("{}/fake_text_arithmetic_add".format(out_dir), fake_add) plt.imsave("{}/fake_text_arithmetic_a_sub_b".format(out_dir), fake_a_sub_b) plt.imsave("{}/fake_text_arithmetic_b_sub_a".format(out_dir), fake_b_sub_a) print(str(txts[0]), str(txts[1]), file=open("{}/fake_text_arithmetic.txt".format(out_dir), "a"))
def calc_validation_loss(sess, loss, accuracy, input_seq, ouput_seq, X_val, y_val): ''' Calculate validation loss on the entire validation set ''' val_accuracy, val_loss, val_batches = 0., 0., 0 batch_size = min(config.val_batch_size, X_val.shape[0]) for (inputs, targets) in utils.iterate_minibatches(X_val, y_val, batchsize=batch_size): batch_loss, batch_accuracy = sess.run([loss, accuracy], feed_dict={input_seq : inputs, ouput_seq : targets}) val_batches += 1 val_loss += batch_loss val_accuracy += batch_accuracy val_loss /= val_batches val_accuracy /= val_batches return val_loss, val_accuracy
def check_accuracy(data, compute_cost_acc, dataset_name='test', batchsize=32): X_test, next_problem_test, truth_test = data print("Testing...") # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in utils.iterate_minibatches(X_test, next_problem_test, truth_test, batchsize, shuffle=False): X_, next_problem_, truth_ = batch err, acc = compute_cost_acc(X_, next_problem_, truth_) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" {} loss:\t\t\t{:.6f}".format(dataset_name, test_err / test_batches)) print(" {} accuracy:\t\t{:.2f} %".format(dataset_name, test_acc / test_batches * 100))
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_samples=5): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, n_samples)) emb, txts = val_data[1], val_data[2] # sample z vector for inference z = np.random.uniform(-1, 1, size=(n_samples, z_dim)) G.trainable = False fake_images = G.predict([z, emb]) for i in range(n_samples): img = ((fake_images[i] + 1)*0.5) plt.imsave("{}/fake_{}".format(out_dir, i), img) print(i, str(txts[i]).strip(), file=open("{}/fake_text.txt".format(out_dir), "a"))
def _run_epoch(self, X, y, batchsize, training=False): """ Function that takes a pair of input data and labels, splits i them into minibatches and pass them through the network. If training (training = True), parameters of the network will be updated. Args: X (ndarray): Input data y (ndarray): Labels batchsize (TYPE): Size of the desired minibatches training (bool, optional): If true, updates of the network parameters with Stochastic Gradient descend will be performed after each iteration. Returns: (float, float): Average Error and Average Accuracy When training only error is returned (Accuracy = None) """ err = 0 acc = 0 batches = 0 for batch in tqdm(iterate_minibatches( X, y, batchsize, shuffle=training), total=len(X)/batchsize): inputs, targets = batch inputs = np.asarray(inputs) targets = np.asarray(targets) if training: err += self._train_fn(inputs, targets) else: verr, vacc = self._val_fn(inputs, targets) err += verr acc += vacc batches += 1 if training: return (err/batches, None) else: return (err/batches, (acc/batches)*100)
def compute_feature(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros((len(Y), 4096)) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False) if len(flip_idx)>1: inputs[flip_idx] = inputs[flip_idx,:,:,::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs)==batchsize: out[batch_id*batchsize : (batch_id+1)*batchsize] = feat_fn(inputs) batch_id += 1 else: out[batch_id*batchsize : ] = feat_fn(inputs) return out
def train(train_data, val_data, train_acc_fn, compute_cost_acc, num_epochs=5, batchsize=32): X_train, next_problem_train, truth_train = train_data X_val, next_problem_val, truth_val = val_data print("Starting training...") # We iterate over epochs: train_accuracies = [] val_accuracies = [] train_losses = [] val_losses = [] for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0.0 train_acc = 0.0 train_batches = 0 start_time = time.time() for batch in utils.iterate_minibatches(X_train, next_problem_train, truth_train, batchsize, shuffle=False): X_, next_problem_, truth_ = batch err, acc = train_acc_fn(X_, next_problem_, truth_) train_err += err train_acc += acc train_batches += 1 val_loss, val_acc = _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc) print(" Epoch {} \tbatch {} \tloss {} \ttrain acc {:.2f} \tval acc {:.2f} ".format(epoch, train_batches, err, acc * 100, val_acc) ) train_acc = train_acc/train_batches * 100 train_accuracies.append(train_acc) train_loss = train_err/train_batches train_losses.append(train_loss) val_loss, val_acc = _check_val_loss_acc(X_val, next_problem_val, truth_val, batchsize, compute_cost_acc) val_losses.append(val_loss) val_accuracies.append(val_acc) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_loss)) print(" training accuracy:\t\t{:.2f} %".format(train_acc)) print(" validation loss:\t\t{:.6f}".format(val_loss)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc)) print("Training completed.") return train_losses, train_accuracies, val_accuracies
def optimize_policy(self, sess, samples, logger=None, **args): obs = samples['observations'] actions = samples['actions'] advantages = samples['advantages'] dist_vars = [samples['infos'][k] for k in self.dist.keys()] inputs = [obs, actions, advantages] + dist_vars feed_dict = dict(list(zip(self.inputs_tensors, inputs))) if self.batch_size is not None and obs.shape[0] >= self.batch_size: for vs in iterate_minibatches(inputs, self.batch_size, shuffle=True): sess.run(self.train_op, feed_dict=dict(list(zip(self.inputs_tensors, vs)))) else: sess.run(self.train_op, feed_dict=feed_dict) if logger: summary_str = sess.run(self.summary_op, feed_dict=feed_dict) logger.add_summary(summary_str)
def infer(data_filepath='data/flowers.hdf5', z_dim=128, out_dir='gan', n_steps=10): G = load_model(out_dir) val_data = get_data(data_filepath, 'train') val_data = next(iterate_minibatches(val_data, 2)) emb_source, emb_target = val_data[1] txts = val_data[2] z = np.random.uniform(-1, 1, size=(1, z_dim)) G.trainable = False for i in range(n_steps + 1): p = i / float(n_steps) emb = emb_source * (1 - p) + emb_target * p emb = emb[None, :] fake_image = G.predict([z, emb])[0] img = ((fake_image + 1) * 0.5) plt.imsave("{}/fake_text_interpolation_i{}".format(out_dir, i), img) print(i, str(txts[int(round(p))]).strip(), file=open("{}/fake_text_interpolation.txt".format(out_dir), "a"))
def train(noise_dim, gen_lr, disc_lr, batch_size, num_epochs, save_every, tensorboard_vis): """Trains the Deep Convolutional Generative Adversarial Network (DCGAN). See https://arxiv.org/abs/1511.06434 for more details. Args: optional arguments [python train.py --help] """ # Load Dataset. logging.info('loading LFW dataset into memory') X, IMAGE_SHAPE = load_dataset(dimx=36, dimy=36) tf.reset_default_graph() try: if not tf.test.is_gpu_available(cuda_only=True): raise Exception except Exception: logging.critical('CUDA capable GPU device not found.') exit(0) logging.warn('constructing graph on GPU') with tf.device('/gpu:0'): # Define placeholders for input data. noise = tf.placeholder('float32', [None, noise_dim]) real_data = tf.placeholder('float32', [ None, ] + list(IMAGE_SHAPE)) # Create Generator and Discriminator models. logging.debug('creating generator and discriminator') g_out = generator(noise, train=True) d_probs, d_fake_logits = discriminator(g_out, train=True) d_probs2, d_real_logits = discriminator(real_data, train=True) logging.debug('defining training ops') # Define Generator(G) ops. g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_fake_logits, labels=tf.ones_like(d_fake_logits))) g_optimizer = tf.train.AdamOptimizer(learning_rate=gen_lr) g_vars = get_vars_by_scope('generator') g_train_step = g_optimizer.minimize(g_loss, var_list=g_vars) # Define Discriminator(D) ops. d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_real_logits, labels=tf.ones_like(d_real_logits))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_fake_logits, labels=tf.zeros_like(d_real_logits))) d_loss = d_loss_real + d_loss_fake d_optimizer = tf.train.AdamOptimizer(learning_rate=disc_lr) d_vars = get_vars_by_scope('discriminator') d_train_step = d_optimizer.minimize(d_loss, var_list=d_vars) with tf.Session() as sess: # Init vars. sess.run(tf.global_variables_initializer()) # Start training. logging.debug('training DCGAN model') for epoch in range(num_epochs): eval_noise = sample_noise_batch(16) idx = np.random.choice(range(X.shape[0]), size=16) eval_real_data = X[idx] for X_batch in tqdm(iterate_minibatches(X, batch_size, shuffle=True), total=X.shape[0] // batch_size, desc='Epoch[{}/{}]'.format( epoch + 1, num_epochs), leave=False): sess.run([d_train_step], feed_dict={ real_data: X_batch, noise: sample_noise_batch(batch_size) }) for _ in range(2): sess.run([g_train_step], feed_dict={noise: sample_noise_batch(batch_size)}) # Evaluating model after every epoch. d_loss_iter, g_loss_iter, eval_images = sess.run( [d_loss, g_loss, g_out], feed_dict={ real_data: eval_real_data, noise: eval_noise }) # Generate images using G and save in `out/`. tl.visualize.save_images(eval_images, [4, 4], 'out/eval_{}.png'.format(epoch + 1)) logging.info( 'Epoch[{}/{}] g_loss: {:.6f} - d_loss: {:.6f}'.format( epoch + 1, num_epochs, g_loss_iter, d_loss_iter))
def comparison(X_train, y_train, X_val, y_val, X_test, y_test, kron_params=None): import pickle kron_params = [{ 'rank': p } for p in np.arange(2, 5, 1)] if kron_params is None else kron_params num_epochs = 5 batch_size = 100 hidden_units = [4 * 4] trains, accs = generate_train_acc(widths=hidden_units, type="dense") trains, accs = list( zip(*([(trains, accs)] + [ generate_train_acc( widths=hidden_units, type="kron", params=kron_param) for kron_param in kron_params ] + [ generate_train_acc( widths=hidden_units, type="uv_kron", params=kron_param) for kron_param in kron_params ]))) names = ["dense"] + [ "kron({})".format(p.values()) for p in kron_params ] + ["uv_kron({})".format(p.values()) for p in kron_params] results = {} for train, acc, name in zip(trains, accs, names): res = {} res["train_fun"] = train res["accuracy_fun"] = acc res["train_err"] = [] res["train_acc"] = [] res["epoch_times"] = [] res["val_acc"] = [] results[name] = res for epoch in range(num_epochs): for (res_name, res) in results.items(): train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size): inputs, targets = batch train_err_batch, train_acc_batch = res["train_fun"](inputs, targets) train_err += train_err_batch train_acc += train_acc_batch train_batches += 1 # And a full pass over the validation data: val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size): inputs, targets = batch val_acc += res["accuracy_fun"](inputs, targets) val_batches += 1 # Then we print the results for this epoch: print("for {}".format(res_name)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss (in-iteration):\t\t{:.6f}".format( train_err / train_batches)) print(" train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) res["train_err"].append(train_err / train_batches) res["train_acc"].append(train_acc / train_batches * 100) res["val_acc"].append(val_acc / val_batches * 100) for res in results.values(): res.pop('train_fun') res.pop('accuracy_fun') with open("comparative_history.dict", 'wb') as pickle_file: pickle.dump(results, pickle_file)
def train_model(num_data, batch_size, learning_rate, patience, decay_rate, X_train, Y_train, mask_train, C_train, X_dev, Y_dev, mask_dev, C_dev, X_test, Y_test, mask_test, C_test, input_var, target_var, mask_var, char_input_var, model, model_name, label_alphabet, output_dir): num_tokens = mask_var.sum(dtype=theano.config.floatX) energies_train = lasagne.layers.get_output(model) energies_eval = lasagne.layers.get_output(model, deterministic=True) loss_train = utils.crf_loss(energies_train, target_var, mask_var).mean() loss_eval = utils.crf_loss(energies_eval, target_var, mask_var).mean() _, corr_train = utils.crf_accuracy(energies_train, target_var) corr_train = (corr_train * mask_var).sum(dtype=theano.config.floatX) prediction_eval, corr_eval = utils.crf_accuracy(energies_eval, target_var) corr_eval = (corr_eval * mask_var).sum(dtype=theano.config.floatX) params = lasagne.layers.get_all_params(model, trainable=True) updates = lasagne.updates.momentum(loss_train, params=params, learning_rate=learning_rate, momentum=0.9) train_fn = theano.function( [input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_tokens], updates=updates) eval_fn = theano.function( [input_var, target_var, mask_var, char_input_var], [loss_eval, corr_eval, num_tokens, prediction_eval]) num_batches = num_data / batch_size num_epochs = 20 best_loss = 1e+12 best_acc = 0.0 best_epoch_loss = 0 best_epoch_acc = 0 best_loss_test_err = 0. best_loss_test_corr = 0. best_acc_test_err = 0. best_acc_test_corr = 0. stop_count = 0 lr = learning_rate for epoch in range(1, num_epochs + 1): print('Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)) train_err = 0.0 train_corr = 0.0 train_total = 0 train_inst = 0 start_time = time.time() num_back = 0 train_batches = 0 for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train, batch_size=batch_size, shuffle=True): inputs, targets, masks, char_inputs = batch err, corr, num = train_fn(inputs, targets, masks, char_inputs) train_err += err * inputs.shape[0] train_corr += corr train_total += num train_inst += inputs.shape[0] train_batches += 1 time_ave = (time.time() - start_time) / train_batches time_left = (num_batches - train_batches) * time_ave sys.stdout.write("\b" * num_back) log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % ( min(train_batches * batch_size, num_data), num_data, train_err / train_inst, train_corr * 100 / train_total, time_left) sys.stdout.write(log_info) num_back = len(log_info) # update training log after each epoch assert train_inst == num_data sys.stdout.write("\b" * num_back) print('train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (min(train_batches * batch_size, num_data), num_data, train_err / num_data, train_corr * 100 / train_total, time.time() - start_time)) # evaluate performance on dev data dev_err = 0.0 dev_corr = 0.0 dev_total = 0 dev_inst = 0 for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size): inputs, targets, masks, char_inputs = batch err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs) dev_err += err * inputs.shape[0] dev_corr += corr dev_total += num dev_inst += inputs.shape[0] utils.output_predictions(predictions, targets, masks, output_dir + '/dev%d' % epoch, label_alphabet, is_flattened=False) print('dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (dev_err / dev_inst, dev_corr, dev_total, dev_corr * 100 / dev_total)) if model_name != 'pos': input = open(output_dir + '/dev%d' % epoch) p1 = subprocess.Popen(shlex.split("perl conlleval.pl"), stdin=input) p1.wait() if best_loss < dev_err and best_acc > dev_corr / dev_total: stop_count += 1 else: update_loss = False update_acc = False stop_count = 0 if best_loss > dev_err: update_loss = True best_loss = dev_err best_epoch_loss = epoch if best_acc < dev_corr / dev_total: update_acc = True best_acc = dev_corr / dev_total best_epoch_acc = epoch # evaluate on test data when better performance detected test_err = 0.0 test_corr = 0.0 test_total = 0 test_inst = 0 for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test, batch_size=batch_size): inputs, targets, masks, char_inputs = batch err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs) test_err += err * inputs.shape[0] test_corr += corr test_total += num test_inst += inputs.shape[0] utils.output_predictions(predictions, targets, masks, output_dir + '/test%d' % epoch, label_alphabet, is_flattened=False) np.savez('pre-trained-model/' + model_name + '/weights', *lasagne.layers.get_all_param_values(model)) print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (test_err / test_inst, test_corr, test_total, test_corr * 100 / test_total)) if model_name != 'pos': input = open(output_dir + '/test%d' % epoch) p1 = subprocess.Popen(shlex.split("perl conlleval.pl"), stdin=input) p1.wait() if update_loss: best_loss_test_err = test_err best_loss_test_corr = test_corr if update_acc: best_acc_test_err = test_err best_acc_test_corr = test_corr # stop if dev acc decrease patience time straightly. if stop_count == patience: break # re-compile a function with new learning rate for training lr = learning_rate / (1.0 + epoch * decay_rate) lasagne.updates.momentum(loss_train, params=params, learning_rate=lr, momentum=0.9) train_fn = theano.function( [input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_tokens], updates=updates) # print(best performance on test data.) print("final best loss test performance (at epoch %d)" % (best_epoch_loss)) print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (best_loss_test_err / test_inst, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)) print("final best acc test performance (at epoch %d)" % (best_epoch_acc)) print('test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (best_acc_test_err / test_inst, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total))
def train(images, labels, fold, model_type, batch_size, num_epochs, subj_id=0, reuse_cnn=False, dropout_rate=dropout_rate, learning_rate_default=1e-3, Optimizer=tf.train.AdamOptimizer, log_path=log_path): """ A sample training function which loops over the training set and evaluates the network on the validation set after each epoch. Evaluates the network on the training set whenever the :param images: input images :param labels: target labels :param fold: tuple of (train, test) index numbers :param model_type: model type ('cnn', '1dconv', 'lstm', 'mix') :param batch_size: batch size for training :param num_epochs: number of epochs of dataset to go over for training :param subj_id: the id of fold for storing log and the best model :param reuse_cnn: whether to train cnn first, and load its weight for multi-frame model :return: none """ with tf.name_scope('Inputs'): input_var = tf.placeholder(tf.float32, [None, None, 32, 32, n_colors], name='X_inputs') target_var = tf.placeholder(tf.int64, [None], name='y_inputs') tf_is_training = tf.placeholder(tf.bool, None, name='is_training') num_classes = len(np.unique(labels)) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) print('Train set label and proportion:\t', np.unique(y_train, return_counts=True)) print('Val set label and proportion:\t', np.unique(y_val, return_counts=True)) print('Test set label and proportion:\t', np.unique(y_test, return_counts=True)) print('The shape of X_trian:\t', X_train.shape) print('The shape of X_val:\t', X_val.shape) print('The shape of X_test:\t', X_test.shape) print("Building model and compiling functions...") if model_type == '1dconv': network = build_convpool_conv1d(input_var, num_classes, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Conv1d' + '_sbj' + str(subj_id)) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_LSTM' + '_sbj' + str(subj_id)) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Mix' + '_sbj' + str(subj_id)) elif model_type == 'cnn': with tf.name_scope(name='CNN_layer' + '_fold' + str(subj_id)): network = build_cnn(input_var) # output shape [None, 4, 4, 128] convpool_flat = tf.reshape(network, [-1, 4 * 4 * 128]) h_fc1_drop1 = tf.layers.dropout(convpool_flat, rate=dropout_rate, training=tf_is_training, name='dropout_1') h_fc1 = tf.layers.dense(h_fc1_drop1, 256, activation=tf.nn.relu, name='fc_relu_256') h_fc1_drop2 = tf.layers.dropout(h_fc1, rate=dropout_rate, training=tf_is_training, name='dropout_2') network = tf.layers.dense(h_fc1_drop2, num_classes, name='fc_softmax') # the loss function contains the softmax activation else: raise ValueError( "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") Train_vars = tf.trainable_variables() prediction = network with tf.name_scope('Loss'): l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in Train_vars if 'kernel' in v.name]) ce_loss = tf.losses.sparse_softmax_cross_entropy(labels=target_var, logits=prediction) _loss = ce_loss + weight_decay * l2_loss # decay_steps learning rate decay decay_steps = 3 * ( len(y_train) // batch_size ) # len(X_train)//batch_size the training steps for an epcoh with tf.name_scope('Optimizer'): # learning_rate = learning_rate_default * Decay_rate^(global_steps/decay_steps) global_steps = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay( # learning rate decay learning_rate_default, # Base learning rate. global_steps, decay_steps, 0.95, # Decay rate. staircase=True) optimizer = Optimizer( learning_rate) # GradientDescentOptimizer AdamOptimizer train_op = optimizer.minimize(_loss, global_step=global_steps, var_list=Train_vars) with tf.name_scope('Accuracy'): prediction = tf.argmax(prediction, axis=1) correct_prediction = tf.equal(prediction, target_var) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Output directory for models and summaries # choose different path for different model and subject out_dir = os.path.abspath( os.path.join(os.path.curdir, log_path, (model_type + '_' + str(subj_id)))) print("Writing to {}\n".format(out_dir)) # Summaries for loss, accuracy and learning_rate loss_summary = tf.summary.scalar('loss', _loss) acc_summary = tf.summary.scalar('train_acc', accuracy) lr_summary = tf.summary.scalar('learning_rate', learning_rate) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, lr_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, tf.get_default_graph()) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, tf.get_default_graph()) # Test summaries test_summary_op = tf.summary.merge([loss_summary, acc_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.summary.FileWriter(test_summary_dir, tf.get_default_graph()) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, model_type) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if model_type != 'cnn' and reuse_cnn: # saver for reuse the CNN weight reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='VGG_NET_CNN') original_saver = tf.train.Saver( reuse_vars) # Pass the variables as a list saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) print("Starting training...") total_start_time = time.time() best_validation_accu = 0 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) if model_type != 'cnn' and reuse_cnn: cnn_model_path = os.path.abspath( os.path.join(os.path.curdir, log_path, ('cnn_' + str(subj_id)), 'checkpoints')) cnn_model_path = tf.train.latest_checkpoint(cnn_model_path) print('-' * 20) print('Load cnn model weight for multi-frame model from {}'.format( cnn_model_path)) original_saver.restore(sess, cnn_model_path) stop_count = 0 # count for earlystopping for epoch in range(num_epochs): print('-' * 50) # Train set train_err = train_acc = train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch summary, _, pred, loss, acc = sess.run( [train_summary_op, train_op, prediction, _loss, accuracy], { input_var: inputs, target_var: targets, tf_is_training: True }) train_acc += acc train_err += loss train_batches += 1 train_summary_writer.add_summary(summary, sess.run(global_steps)) av_train_err = train_err / train_batches av_train_acc = train_acc / train_batches # Val set summary, pred, av_val_err, av_val_acc = sess.run( [dev_summary_op, prediction, _loss, accuracy], { input_var: X_val, target_var: y_val, tf_is_training: False }) dev_summary_writer.add_summary(summary, sess.run(global_steps)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) fmt_str = "Train \tEpoch [{:d}/{:d}] train_Loss: {:.4f}\ttrain_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_train_err, av_train_acc * 100) print(print_str) fmt_str = "Val \tEpoch [{:d}/{:d}] val_Loss: {:.4f}\tval_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_val_err, av_val_acc * 100) print(print_str) # Test set summary, pred, av_test_err, av_test_acc = sess.run( [test_summary_op, prediction, _loss, accuracy], { input_var: X_test, target_var: y_test, tf_is_training: False }) test_summary_writer.add_summary(summary, sess.run(global_steps)) fmt_str = "Test \tEpoch [{:d}/{:d}] test_Loss: {:.4f}\ttest_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_test_err, av_test_acc * 100) print(print_str) if av_val_acc > best_validation_accu: # early_stoping stop_count = 0 eraly_stoping_epoch = epoch best_validation_accu = av_val_acc test_acc_val = av_test_acc saver.save(sess, checkpoint_prefix, global_step=sess.run(global_steps)) else: stop_count += 1 if stop_count >= 10: # stop training if val_acc dose not imporve for over 10 epochs break train_batches = train_acc = 0 for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch acc = sess.run(accuracy, { input_var: X_train, target_var: y_train, tf_is_training: False }) train_acc += acc train_batches += 1 last_train_acc = train_acc / train_batches last_val_acc = av_val_acc last_test_acc = av_test_acc print('-' * 50) print('Time in total:', time.time() - total_start_time) print("Best validation accuracy:\t\t{:.2f} %".format( best_validation_accu * 100)) print( "Test accuracy when got the best validation accuracy:\t\t{:.2f} %". format(test_acc_val * 100)) print('-' * 50) print("Last train accuracy:\t\t{:.2f} %".format(last_train_acc * 100)) print("Last validation accuracy:\t\t{:.2f} %".format(last_val_acc * 100)) print("Last test accuracy:\t\t\t\t{:.2f} %".format(last_test_acc * 100)) print('Early Stopping at epoch: {}'.format(eraly_stoping_epoch + 1)) train_summary_writer.close() dev_summary_writer.close() test_summary_writer.close() return [ last_train_acc, best_validation_accu, test_acc_val, last_val_acc, last_test_acc ]
def comparison(X_train,y_train,X_val,y_val,X_test,y_test, kron_params=None): import pickle kron_params = [{'rank': p} for p in np.arange(2, 5, 1)] if kron_params is None else kron_params num_epochs = 5 batch_size = 100 hidden_units = [4*4] trains, accs = generate_train_acc(widths=hidden_units, type="dense") trains, accs = list(zip(*([(trains, accs)] + [generate_train_acc(widths=hidden_units, type="kron", params=kron_param) for kron_param in kron_params] + [generate_train_acc(widths=hidden_units, type="uv_kron", params=kron_param) for kron_param in kron_params]))) names = ["dense"] + ["kron({})".format(p.values()) for p in kron_params] + ["uv_kron({})".format(p.values()) for p in kron_params] results = {} for train, acc, name in zip(trains, accs, names): res = {} res["train_fun"] = train res["accuracy_fun"] = acc res["train_err"] = [] res["train_acc"] = [] res["epoch_times"] = [] res["val_acc"] = [] results[name] = res for epoch in range(num_epochs): for (res_name, res) in results.items(): train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train,batch_size): inputs, targets = batch train_err_batch, train_acc_batch= res["train_fun"](inputs, targets) train_err += train_err_batch train_acc += train_acc_batch train_batches += 1 # And a full pass over the validation data: val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size): inputs, targets = batch val_acc += res["accuracy_fun"](inputs, targets) val_batches += 1 # Then we print the results for this epoch: print("for {}".format(res_name)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches)) print(" train accuracy:\t\t{:.2f} %".format( train_acc / train_batches * 100)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) res["train_err"].append(train_err / train_batches) res["train_acc"].append(train_acc / train_batches * 100) res["val_acc"].append(val_acc / val_batches * 100) for res in results.values(): res.pop('train_fun') res.pop('accuracy_fun') with open("comparative_history.dict", 'wb') as pickle_file: pickle.dump(results, pickle_file)
def run(X_train,y_train,X_val,y_val,X_test,y_test): import pickle import cProfile kron_params = [{'param_density': p} for p in np.linspace(0.0, 0.0, 1, endpoint=False)] num_epochs = 5 batch_size = 100 hidden_units = [100**2] trains, accs = list(zip(*([generate_train_acc(widths=hidden_units, type="old_kron", params=kron_param) for kron_param in kron_params]))) names = ["old_kron({})".format(p.values()) for p in kron_params] results = {} for train, acc, name in zip(trains, accs, names): res = {} res["train_fun"] = train res["accuracy_fun"] = acc res["train_err"] = [] res["train_acc"] = [] res["epoch_times"] = [] res["val_acc"] = [] results[name] = res # Just profile if you need pr = cProfile.Profile() pr.enable() for epoch in range(num_epochs): for (res_name, res) in results.items(): train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train,batch_size): inputs, targets = batch train_err_batch, train_acc_batch= res["train_fun"](inputs, targets) train_err += train_err_batch train_acc += train_acc_batch train_batches += 1 # And a full pass over the validation data: val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size): inputs, targets = batch val_acc += res["accuracy_fun"](inputs, targets) val_batches += 1 # Then we print the results for this epoch: print("for {}".format(res_name)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches)) print(" train accuracy:\t\t{:.2f} %".format( train_acc / train_batches * 100)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) res["train_err"].append(train_err / train_batches) res["train_acc"].append(train_acc / train_batches * 100) res["val_acc"].append(val_acc / val_batches * 100) # Just profile if you need pr.disable() pr.print_stats(sort='cumtime') for res in results.values(): res.pop('train_fun') res.pop('accuracy_fun') with open("comparative_history.dict", 'wb') as pickle_file: pickle.dump(results, pickle_file)
def train(num_epochs, batch_size, learning_rate, tensorboard_vis): X_train, X_val, X_test, Y_train, Y_val, Y_test = load_dataset() # X_train, Y_train = np.random.random(size=(1000, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(1000, 1)).astype(np.float32) # X_test, Y_test = np.random.random(size=(200, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(200, 1)).astype(np.float32) # X_val, Y_val = np.random.random(size=(100, 256, 256, 3)).astype(np.float32), np.random.randint(2, size=(100, 1)).astype(np.float32) print("number of training examples = " + str(X_train.shape[0])) print("number of test examples = " + str(X_test.shape[0])) print("X_train shape: " + str(X_train.shape)) print("Y_train shape: " + str(Y_train.shape)) print("X_test shape: " + str(X_test.shape)) print("Y_test shape: " + str(Y_test.shape)) num_examples = X_train.shape[0] input_shape = (None, ) + tuple(X_train.shape[1:]) timestamp = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime()) tf.reset_default_graph() image_data = tf.placeholder(dtype=tf.float32, shape=input_shape, name='image_data') targets = tf.placeholder(dtype=tf.float32, shape=(None, 1), name='targets') keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') with tf.variable_scope('zero_pad') as scope: zero_pad = tf.pad(image_data, [[0, 0], [3, 3], [3, 3], [0, 0]], name=scope.name) with tf.variable_scope('conv1') as scope: kernel = tf.get_variable('kernel', shape=[7, 7, 3, 32], initializer=tf.random_uniform_initializer(), dtype=tf.float32) conv = tf.nn.conv2d(zero_pad, filter=kernel, strides=[1, 1, 1, 1], padding='SAME') bn = tf.layers.batch_normalization(conv) relu = tf.nn.relu(bn) dropout = tf.nn.dropout(relu, keep_prob=keep_prob) conv1 = tf.nn.max_pool(dropout, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=scope.name) with tf.variable_scope('logits') as scope: dim = np.prod(conv1.get_shape().as_list()[1:]) flatten = tf.reshape(conv1, shape=[-1, dim]) weights = tf.get_variable('weights', shape=[dim, 1], initializer=tf.random_uniform_initializer(), dtype=tf.float32) bias = tf.get_variable('bias', shape=[1], initializer=tf.constant_initializer(0.0), dtype=tf.float32) dense = tf.add(tf.matmul(flatten, weights), bias) logits = tf.nn.sigmoid(dense, name=scope.name) loss = tf.losses.sigmoid_cross_entropy(targets, logits=logits) accuracy = tf.reduce_mean( tf.cast(tf.equal(logits, targets), dtype=tf.float32)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_step = optimizer.minimize(loss) if tensorboard_vis: tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) summaries = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.trainable_variables()) n_steps = num_examples // batch_size if not num_examples % batch_size == 0: n_steps += 1 if tensorboard_vis: train_writer = tf.summary.FileWriter('logs/train', sess.graph) val_writer = tf.summary.FileWriter('logs/val', sess.graph) for epoch in range(num_epochs): # Training train_losses, train_accuracies, n_iter = [], [], 0 for image_batch, label_batch in tqdm( iterate_minibatches(X_train, Y_train, batchsize=batch_size, shuffle=True), total=n_steps, desc='Epoch {}/{}'.format(epoch, num_epochs)): if tensorboard_vis: _, train_loss, train_acc, summary = sess.run( [train_step, loss, accuracy, summaries], feed_dict={ image_data: image_batch, targets: label_batch, keep_prob: 0.5 }) else: _, train_loss, train_acc = sess.run( [train_step, loss, accuracy], feed_dict={ image_data: image_batch, targets: label_batch, keep_prob: 0.5 }) if tensorboard_vis and n_iter == 0: train_writer.add_summary(summary, n_iter) train_writer.flush() train_losses.append(train_loss) train_accuracies.append(train_acc) n_iter += 1 avg_train_loss = np.mean(train_losses) avg_train_acc = np.mean(train_accuracies) # Validation val_losses, val_accuracies, n_iter = [], [], 0 for image_batch, label_batch in iterate_minibatches( X_val, Y_val, batchsize=batch_size, shuffle=True): if tensorboard_vis: val_loss, val_acc, summary = sess.run( [loss, accuracy, summaries], feed_dict={ image_data: image_batch, targets: label_batch, keep_prob: 1.0 }) else: val_loss, val_acc = sess.run([loss, accuracy], feed_dict={ image_data: image_batch, targets: label_batch, keep_prob: 1.0 }) if tensorboard_vis and n_iter == 0: val_writer.add_summary(summaries, n_iter) val_writer.flush() val_losses.append(val_loss) val_accuracies.append(val_acc) n_iter += 1 avg_val_loss = np.mean(val_losses) avg_val_acc = np.mean(val_accuracies) print( 'Epoch {}/{}: train loss: {:.4f} train acc: {:.4f} val loss: {:.4f} val acc: {:.4f}' .format(epoch, num_epochs, avg_train_loss, avg_train_acc, avg_val_loss, avg_val_acc)) # save model checkpoint saver.save(sess, 'models/{}/model.ckpt'.format(timestamp), global_step=epoch) # Testing test_losses, test_accuracies, n_iter = [], [], 0 for image_batch, label_batch in iterate_minibatches(X_test, Y_test, batchsize=batch_size, shuffle=True): test_loss, test_acc = sess.run([loss, accuracy], feed_dict={ image_data: image_batch, targets: label_batch, keep_prob: 1.0 }) test_losses.append(test_loss) test_accuracies.append(test_acc) n_iter += 1 avg_test_loss = np.mean(test_losses) avg_test_acc = np.mean(test_accuracies) print('Test Loss: {:.4f} Test Accuracy: {:.4f}'.format( avg_test_loss, avg_test_acc)) sess.close()
# With our vocabulary, we still need a method that converts a whole sentence to a sequence of IDs. # And, to speed up training, we would like to get a so-called mini-batch at a time: multiple of such sequences together. So our function takes a corpus iterator and a vocabulary, and returns a mini-batch of shape [Batch, Time], where the first dimension indexes the sentences in the batch, and the second the time steps in each sentence. # In[19]: from utils import iterate_minibatches, prepare_data # Let's try it out! # In[20]: src_reader = smart_reader(train_e_path) trg_reader = smart_reader(train_f_path) bitext = bitext_reader(src_reader, trg_reader) for batch_id, batch in enumerate(iterate_minibatches(bitext, batch_size=4)): print("This is the batch of data that we will train on, as tokens:") pprint(batch) print() x, y = prepare_data(batch, vocabulary_e, vocabulary_f) print("These are our inputs (i.e. words replaced by IDs):") print(x) print() print("These are the outputs (the foreign sentences):") print(y) print()
target_var, wordEmbeddings) """ epsilon = 1.0e-7 print ("Starting training...") best_val_acc = 0 best_val_pearson = 0 for epoch in range(args.epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches( X1_train, X1_mask_train, X2_train, X2_mask_train, Y_labels_train, Y_scores_train, Y_scores_pred_train, args.minibatch, shuffle=True, ): inputs1, inputs1_mask, inputs2, inputs2_mask, labels, scores, scores_pred = batch if args.task == "sts": scores_pred = np.clip(scores_pred, epsilon, 1.0 - epsilon) train_err += train_fn(inputs1, inputs1_mask, inputs2, inputs2_mask, scores_pred) # train_err += train_fn(inputs1, inputs2, scores_pred) elif args.task == "ent": # labels = np.clip(labels, epsilon, 1.0 - epsilon) train_err += train_fn(inputs1, inputs1_mask, inputs2, inputs2_mask, labels)
def train( X_train, y_train, X_test, y_test, architecture, LABEL_1, LABEL_2, # labels of the y. num_epochs=100, batchsize=5, dict_of_paths={ 'output': '1.txt', 'picture': '1.png', 'report': 'report.txt' }, report='''trained next architecture, used some optimizstion method with learning rate...'''): """ Iterate minibatches on train subset and validate results on test subset. Parameters ---------- X_train : numpy array X train subset. y_train : numpy array Y train subset. X_test : numpy array X test subset. y_test : numpy array Y test subset. LABEL_1 : {'AD', 'LMCI', 'EMCI', 'Normal'} String label for target == 0. LABEL_2 : {'AD', 'LMCI', 'EMCI', 'Normal'} String label for target == 1. dict_of_paths : dictionary Names of files to store results. report : string Some comments which will saved into report after ending of training. num_epochs : integer Number of epochs for all of the experiments. Default is 100. batchsize : integer Batchsize for network training. Default is 5. Returns ------- tr_losses : numpy.array Array with loss values on train. val_losses : numpy.array Array with loss values on test. val_accs : numpy.array Array with accuracy values on test. rocs : numpy.array Array with roc auc values on test. """ eps = [] tr_losses = [] val_losses = [] val_accs = [] rocs = [] FILE_PATH = dict_of_paths['output'] PICTURE_PATH = dict_of_paths['picture'] REPORT_PATH = dict_of_paths['report'] # here we written outputs on each step (val and train losses, accuracy, auc) with open(FILE_PATH, 'w') as f: f.write('\n----------\n\n' + str(datetime.datetime.now())[:19]) f.write('\n' + LABEL_1 + '-' + LABEL_2 + '\n') f.close() # starting training print("Starting training...") sys.stdout.flush() den = X_train.shape[0] / batchsize for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches_train(X_train, y_train, batchsize, shuffle=True): inputs, targets = batch history = architecture.fit(inputs, targets) train_err = train_err + np.mean(history.history['loss']) train_batches = train_batches + 1 val_err = 0 val_batches = 0 preds = [] targ = [] for batch in iterate_minibatches(X_test, y_test, batchsize, shuffle=False): inputs, targets = batch err = architecture.evaluate(inputs, targets) val_err = val_err + np.mean(err) val_batches = val_batches + 1 out = architecture.predict(inputs) [preds.append(i) for i in out] [targ.append(i) for i in targets] preds_tst = np.array(preds).argmax(axis=1) ## ## output print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) sys.stdout.flush() print(" training loss:\t\t{:.7f}".format(train_err / train_batches)) sys.stdout.flush() print(" validation loss:\t\t{:.7f}".format(val_err / val_batches)) sys.stdout.flush() print(' validation accuracy:\t\t{:.7f}'.format( accuracy_score(np.array(targ), preds_tst))) sys.stdout.flush() print('Confusion matrix for test:') sys.stdout.flush() print(confusion_matrix(np.array(targ), np.array(preds).argmax(axis=1))) sys.stdout.flush() rcs = roc_auc_score(np.array(targ), np.array(preds)) sys.stderr.write('Pairwise ROC_AUCs: ' + str(rcs)) print('') with open(FILE_PATH, 'a') as f: f.write("\nEpoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) f.write("\n training loss:\t\t{:.7f}".format(train_err / train_batches)) f.write("\n validation loss:\t\t{:.7f}".format(val_err / val_batches)) f.write('\n validation accuracy:\t\t{:.7f}'.format( accuracy_score(np.array(targ), np.array(preds).argmax(axis=1)))) f.write('\n Pairwise ROC_AUCs:' + str(rcs) + '\n') f.close() ## output ## saving results eps.append(epoch + 1) tr_losses.append(train_err / train_batches) val_losses.append(val_err / val_batches) val_accs.append( accuracy_score(np.array(targ), np.array(preds).argmax(axis=1))) rocs.append(rcs) print('ended!') ### and save plots plt.figure(figsize=(15, 10)) plt.subplot(2, 2, 1) plt.title('Loss ' + LABEL_1 + ' vs ' + LABEL_2) plt.xlabel('Epoch') plt.ylim((0, 3)) plt.ylabel('Loss') plt.plot(eps, tr_losses, label='train') plt.plot(eps, val_losses, label='validation') plt.legend(loc=0) # plt.subplot(2, 2, 2) plt.title('Accuracy ' + LABEL_1 + ' vs ' + LABEL_2) plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.plot(eps, val_accs, label='validation accuracy') plt.legend(loc=0) # plt.subplot(2, 2, 3) plt.title('AUC ' + LABEL_1 + ' vs ' + LABEL_2) plt.xlabel('Epoch') plt.ylabel('AUC') plt.plot(eps, np.array(rocs), label='validation auc') plt.legend(loc=0) # plt.subplot(2, 2, 4) plt.title('architecture') plt.axis('off') plt.text( 0, -0.1, architecture, fontsize=7, ) plt.savefig(PICTURE_PATH) ########### # write that trainig was ended with open(FILE_PATH, 'a') as f: f.write('\nended at ' + str(datetime.datetime.now())[:19] + '\n \n') f.close() # write report with open(REPORT_PATH, 'a') as f: f.write('\n' + LABEL_1 + ' vs ' + LABEL_2 + '\n' + report) # f.write(architecture) f.write('final results are:') f.write('\n tr_loss: ' + str(tr_losses[-1]) + '\n val_loss: ' + \ str(val_losses[-1]) + '\n val_acc; ' + str(val_accs[-1]) + \ '\n val_roc_auc: ' + str(rocs[-1])) f.write('\nresults has been saved in files:\n') f.write(FILE_PATH + '\n') f.write(PICTURE_PATH + '\n') f.write('\n ___________________ \n\n\n') f.close() return tr_losses, val_losses, val_accs, rocs
# LOOP EPOCHS print('\tTrain model') for epoch in range(MAX_EPOCHS): print('\tEpoch: ' + str(epoch + 1) + ' of ' + str(MAX_EPOCHS)) # down sample inputs_train, targets_train = u_s.down_sample( inputs_=inputs_train_ep, targets_=targets_train_ep, no_class=NUM_CLASSES) max_mini_batch = np.ceil(1 + len(inputs_train) / BATCH_SIZE) _iter = 1 for x_batch, y_batch in utils.iterate_minibatches( batchsize=BATCH_SIZE, inputs=inputs_train, targets=targets_train, shuffle=True): # _, _loss, _acc = sess.run( fetches=[train_model, cross_entropy, accuracy], feed_dict={ x_pl: x_batch, y_pl: y_batch }) # print("\t\tminibatch: %d ~ %d\tLOSS: %f\tACCs: %f" % (_iter, max_mini_batch, _loss, _acc), end='\r') _iter += 1
profile=False test_count = 0 first = True Q_conv_count=0 frozen_epoch=0 for epoch in range(num_epochs): start_time = time.time() loss = 0 err = 0 Q = 0 for batch in utils.iterate_minibatches(inputs=train_data_resized, targets=train_labels, batchsize=batch_size): train_in, train_target = batch #train_in = train_in[:,np.newaxis,:,np.newaxis] tmp_sum, loss_, err_, Q_ = model.train(train_in, train_target, profile) if profile: fetched_timeline = timeline.Timeline(model.run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('grcnn-timeline_01_step_0.json', 'w') as f: f.write(chrome_trace) if first: model.writer.add_summary(tmp_sum, epoch) profile=False loss +=loss_ err += err_ Q += Q_ #print loss
def training(trainers, train_data, testers=[], test_data=[], num_epochs=20, logger=None): """ TODO : Explain the whole function Params ------ trainers: train_data: testers: (default=[]) test_data: (default=[]) num_epochs: (default=20) logger: (default=None) Return ------ stats: dict with stats """ if logger is None: logger = empty_logger() logger.info("Starting training...") final_stats = {} final_stats.update({trainer.name+' training loss': [] for trainer in trainers}) final_stats.update({trainer.name+' valid loss': [] for trainer in trainers}) final_stats.update({tester.name+' valid loss': [] for tester in testers}) final_stats.update({(trainer.name+str(i)+' training acc' if trainer.train.n_returned_outputs > 2 else trainer.name+' training acc'): [] for trainer in trainers for i in range(trainer.train.n_returned_outputs-1)}) final_stats.update({(trainer.name+str(i)+' valid acc' if trainer.train.n_returned_outputs > 2 else trainer.name+' valid acc'): [] for trainer in trainers for i in range(trainer.train.n_returned_outputs-1)}) final_stats.update({(tester.name+str(i)+' valid acc' if tester.train.n_returned_outputs > 2 else tester.name+' valid acc'): [] for tester in testers for i in range(tester.train.n_returned_outputs-1)}) # final_stats.update({trainer.name+' valid acc': [] for trainer in trainers}) # final_stats.update({tester.name+' valid acc': [] for tester in testers}) for epoch in range(num_epochs): # Prepare the statistics start_time = time.time() stats = { key:[] for key in final_stats.keys()} # Do some trainning preparations : for data, trainer in zip(train_data+test_data, trainers+testers): trainer.preprocess(data, trainer, epoch) # Training : (forward and backward propagation) # done with the iterative functions batches = tuple(iterate_minibatches(data['X_train'], data['y_train'], data['batchsize'], shuffle=True) for data in train_data) for minibatches in zip(*batches): for batch, trainer in zip(minibatches, trainers): # X, y = batch res = trainer.train(*batch) loss, acc = res.pop(0), res # The first should be the loss stats[trainer.name+' training loss'].append(loss) # If we are in a normal case, res is only one accuracy if len(acc) == 1: stats[trainer.name+' training acc'].append(acc*100) else: # Else we have multiple accuracies for i, a in enumerate(acc): stats[trainer.name+str(i)+' training acc'].append(a*100) # Validation (forward propagation) # done with the iterative functions batches = tuple(iterate_minibatches(data['X_val'], data['y_val'], data['batchsize']) for data in train_data+test_data) for minibatches in zip(*batches): for batch, valider in zip(minibatches, trainers+testers): # X, y = batch res = valider.valid(*batch) loss, acc = res.pop(0), res # The first should be the loss stats[valider.name+' valid loss'].append(loss) # If we are in a normal case, res is only one accuracy if len(acc) == 1: stats[valider.name+' valid acc'].append(acc*100) else: # Else we have multiple accuracies for i, a in enumerate(acc): stats[valider.name+str(i)+' valid acc'].append(a*100) logger.info("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) for stat_name, stat_value in sorted(stats.items()): if stat_value: mean_value = np.mean(stat_value) logger.info(' {:30} : {:.6f}'.format( stat_name, mean_value)) final_stats[stat_name].append(mean_value) return final_stats
def train(data_folderpath='data/edges2shoes', image_size=256, ndf=64, ngf=64, lr_d=2e-4, lr_g=2e-4, n_iterations=int(1e6), batch_size=64, iters_per_checkpoint=100, n_checkpoint_samples=16, reconstruction_weight=100, out_dir='gan'): logger = SummaryWriter(out_dir) logger.add_scalar('d_lr', lr_d, 0) logger.add_scalar('g_lr', lr_g, 0) data_iterator = iterate_minibatches( data_folderpath + "/train/*.jpg", batch_size, image_size) val_data_iterator = iterate_minibatches( data_folderpath + "/val/*.jpg", n_checkpoint_samples, image_size) img_ab_fixed, _ = next(val_data_iterator) img_a_fixed, img_b_fixed = img_ab_fixed[:, 0], img_ab_fixed[:, 1] img_a_shape = img_a_fixed.shape[1:] img_b_shape = img_b_fixed.shape[1:] patch = int(img_a_shape[0] / 2**4) # n_layers disc_patch = (patch, patch, 1) print("img a shape ", img_a_shape) print("img b shape ", img_b_shape) print("disc_patch ", disc_patch) # plot real text for reference log_images(img_a_fixed, 'real_a', '0', logger) log_images(img_b_fixed, 'real_b', '0', logger) # build models D = build_discriminator( img_a_shape, img_b_shape, ndf, activation='sigmoid') G = build_generator(img_a_shape, ngf) # build model outputs img_a_input = Input(shape=img_a_shape) img_b_input = Input(shape=img_b_shape) fake_samples = G(img_a_input) D_real = D([img_a_input, img_b_input]) D_fake = D([img_a_input, fake_samples]) loss_reconstruction = partial(mean_absolute_error, real_samples=img_b_input, fake_samples=fake_samples) loss_reconstruction.__name__ = 'loss_reconstruction' # define D graph and optimizer G.trainable = False D.trainable = True D_model = Model(inputs=[img_a_input, img_b_input], outputs=[D_real, D_fake]) D_model.compile(optimizer=Adam(lr_d, beta_1=0.5, beta_2=0.999), loss='binary_crossentropy') # define D(G(z)) graph and optimizer G.trainable = True D.trainable = False G_model = Model(inputs=[img_a_input, img_b_input], outputs=[D_fake, fake_samples]) G_model.compile(Adam(lr=lr_g, beta_1=0.5, beta_2=0.999), loss=['binary_crossentropy', loss_reconstruction], loss_weights=[1, reconstruction_weight]) ones = np.ones((batch_size, ) + disc_patch, dtype=np.float32) zeros = np.zeros((batch_size, ) + disc_patch, dtype=np.float32) dummy = zeros for i in range(n_iterations): D.trainable = True G.trainable = False image_ab_batch, _ = next(data_iterator) loss_d = D_model.train_on_batch( [image_ab_batch[:, 0], image_ab_batch[:, 1]], [ones, zeros]) D.trainable = False G.trainable = True image_ab_batch, _ = next(data_iterator) loss_g = G_model.train_on_batch( [image_ab_batch[:, 0], image_ab_batch[:, 1]], [ones, dummy]) print("iter", i) if (i % iters_per_checkpoint) == 0: G.trainable = False fake_image = G.predict(img_a_fixed) log_images(fake_image, 'val_fake', i, logger) save_model(G, out_dir) log_losses(loss_d, loss_g, i, logger)
eps = [] best_val_acc = 0 print "Start training\n" for epoch in range(num_epochs): # Calculate epoch time start_time = time.time() # Full pass training set train_err = 0 train_batches = 0 confusion_train = ConfusionMatrix(n_class) # Generate minibatches and train on each one of them for batch in iterate_minibatches(X_tr, y_tr, mask_tr, batch_size, shuffle=True): inputs, targets, in_masks = batch tr_err, predict = train_fn(inputs, targets, in_masks) train_err += tr_err train_batches += 1 preds = np.argmax(predict, axis=-1) confusion_train.batch_add(targets, preds) train_loss = train_err / train_batches train_accuracy = confusion_train.accuracy() cf_train = confusion_train.ret_mat() # Full pass validation set val_err = 0 val_batches = 0
def main(reps, pretrained_w_path, do_module1, init_seed=0, load_t=0, num_epochs=200, batchsize=96, fine_tune=0, patience=500, lr_init = 1e-3, optim='adagrad', toy=0, num_classes=23): res_root = '/home/hoa/Desktop/projects/resources' X_path=osp.join(res_root, 'datasets/msrcv2/Xaug_b01c.npy') Y_path=osp.join(res_root, 'datasets/msrcv2/Y.npy') MEAN_IMG_PATH=osp.join(res_root, 'models/ilsvrc_2012_mean.npy') snapshot=50 # save model after every `snapshot` epochs drop_p=0.5 # drop out prob. lambda2=0.0005/2 # l2-regularizer constant # step=patience/4 # decay learning after every `step` epochs lr_patience=60 # for learning rate schedule, if optim=='momentum' if toy: # unit testing num_epochs=10 data_multi=3 reps = 2 #drop_p=0 #lambda2=0 # Create name tag for the experiment if fine_tune: full_or_tune = 'tune' # description tag for storing associated files else: full_or_tune = 'full' time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime()) snapshot_root = '../snapshot_models/' snapshot_name = str(num_classes)+'alex'+time_stamp+full_or_tune # LOADING DATA print 'LOADING DATA ...' X = np.load(X_path) Y = np.load(Y_path) if X.shape[1]!=3: X = b01c_to_bc01(X) N = len(Y) print 'Raw X,Y shape', X.shape, Y.shape if len(X) != len(Y): print 'Inconsistent number of input images and labels. X is possibly augmented.' MEAN_IMG = np.load(MEAN_IMG_PATH) MEAN_IMG_227 = skimage.transform.resize( np.swapaxes(np.swapaxes(MEAN_IMG,0,1),1,2), (227,227), mode='nearest', preserve_range=True) MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227,1,2),0,1).reshape((1,3,227,227)) all_metrics = [] # store metrics in each run time_profiles = { 'train_module1': [], 'train_module1_eff': [], 'train_module2': [], 'test': [] } # record training and testing time # PREPARE THEANO EXPRESSION FOR BOTH MODULES print 'COMPILING THEANO EXPRESSION ...' input_var = T.tensor4('inputs') target_var = T.imatrix('targets') network = build_model(num_classes=num_classes, input_var=input_var) # Create a loss expression for training prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_crossentropy(prediction, target_var) weights = lasagne.layers.get_all_params(network, regularizable=True) l2reg = theano.shared(floatX(lambda2))*T.sum([T.sum(w ** 2) for w in weights]) loss = loss.mean() + l2reg lr = theano.shared(np.array(lr_init, dtype=theano.config.floatX)) lr_decay = np.array(1./3, dtype=theano.config.floatX) # Create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) # last-layer case is actually very simple: # `params` above is a list of all (W,b)-pairs # Therefore last layer's (W,b) is params[-2:] if fine_tune == 7: # tuning params from fc7 to fc8 params = params[-2:] # elif fine_tune == 6: # tuning params from fc6 to fc8 # params = params[-4:] # TODO adjust for per-layer training with local_lr if optim=='momentum': updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) elif optim=='rmsprop': updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr, rho=0.9, epsilon=1e-06) elif optim=='adam': updates = lasagne.updates.adam( loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08) elif optim=='adagrad': updates = lasagne.updates.adagrad(loss, params, learning_rate=lr, epsilon=1e-06) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() + l2reg # zero-one loss with threshold t = 0.5 for reference # zero_one_loss = T.abs_((test_prediction > theano.shared(floatX(0.5))) - target_var).sum(axis=1) #zero_one_loss /= target_var.shape[1].astype(theano.config.floatX) #zero_one_loss = zero_one_loss.mean() # Compile a function performing a backward pass (training step) on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: bwd_fn = theano.function([input_var, target_var], loss, updates=updates,) # Compile a second function performing a forward pass, # returns validation loss, 0/1 Error, score i.e. Xout: fwd_fn = theano.function([input_var, target_var], test_loss) # Create a theano function for computing score score = lasagne.layers.get_output(network, deterministic=True) score_fn = theano.function([input_var], score) def compute_score(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros(Y.shape) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False) if len(flip_idx)>1: inputs[flip_idx] = inputs[flip_idx,:,:,::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs)==batchsize: out[batch_id*batchsize : (batch_id+1)*batchsize] = score_fn(inputs) batch_id += 1 else: out[batch_id*batchsize : ] = score_fn(inputs) return out try: # MAIN LOOP FOR EACH RUN for seed in np.arange(reps)+init_seed: # reset learning rate lr.set_value(lr_init) print '\nRUN', seed, '...' # Split train/val/test set indicies = np.arange(len(Y)) Y_train_val, Y_test, idx_train_val, idx_test = train_test_split( Y, indicies, random_state=seed, train_size=float(2)/3) Y_train, Y_val, idx_train, idx_val = train_test_split( Y_train_val, idx_train_val, random_state=seed) print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test) idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx', N=N) Xaug_train = X[idx_aug_train] Yaug_train = data_aug(Y_train, mode='aug', isMat='Y', N=N) idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx', N=N) Xaug_val = X[idx_aug_val] Yaug_val = data_aug(Y_val, mode='aug', isMat='Y', N=N) # Module 2 training set is composed of module 1 training and validation set idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx', N=N) Xaug_train_val = X[idx_aug_train_val] Yaug_train_val = data_aug(Y_train_val, mode='aug', isMat='Y', N=N) # Test set X_test = X[idx_test] # Y_test is already returned in the first train_test_split print "Augmented train/val/test set size:",len(Xaug_train),len(Yaug_val), len(X_test) print "Augmented (X,Y) dtype:", Xaug_train.dtype, Yaug_val.dtype print "Processed Mean image:",MEAN_IMG.dtype,MEAN_IMG.shape if toy: # try to overfit a tiny subset of the data Xaug_train = Xaug_train[:batchsize*data_multi + batchsize/2] Yaug_train = Yaug_train[:batchsize*data_multi + batchsize/2] Xaug_val = Xaug_val[:batchsize + batchsize/2] Yaug_val = Yaug_val[:batchsize + batchsize/2] # Init by pre-trained weights, if any if len(pretrained_w_path)>0: layer_list = lasagne.layers.get_all_layers(network) # 22 layers if pretrained_w_path.endswith('pkl'): # load reference_net # use case: weights initialized from pre-trained reference nets f = open(pretrained_w_path, 'r') w_list = pickle.load(f) # list of 11 (W,b)-pairs f.close() lasagne.layers.set_all_param_values(layer_list[-3], w_list[:-2]) # exclude (W,b) of fc8 # BIG NOTE: don't be confused, it's pure coincident that layer_list # and w_list have the same index here. The last element of layer_list are # [.., fc6, drop6, fc7, drop7, fc8], while w_list are # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to # params that are associated with fc7 i.e. params that connect drop6 to fc7 elif pretrained_w_path.endswith('npz'): # load self-trained net # use case: continue training from a snapshot model with np.load(pretrained_w_path) as f: # NOTE: only load snapshot of the same `seed` # w_list = [f['arr_%d' % i] for i in range(len(f.files))] w_list = [f.items()['arr_%d' % i] for i in range(len(f.files))] # load from bkviz, one-time use lasagne.layers.set_all_param_values(network, w_list) elif pretrained_w_path.endswith('/'): # init from 1 of the 30 snapshots from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) # START MODULE 1 module1_time = 0 if do_module1: print 'MODULE 1' training_history={} training_history['iter_training_loss'] = [] training_history['iter_validation_loss'] = [] training_history['training_loss'] = [] training_history['validation_loss'] = [] training_history['learning_rate'] = [] # http://deeplearning.net/tutorial/gettingstarted.html#early-stopping # early-stopping parameters n_train_batches = Xaug_train.shape[0] / batchsize if Xaug_train.shape[0] % batchsize != 0: n_train_batches += 1 patience = patience # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found lr_patience_increase = 1.01 improvement_threshold = 0.995 # a relative improvement of this much is # considered significant; a significant test # MIGHT be better validation_frequency = min(n_train_batches, patience/2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None epoch_validation_loss = 0 # indicates that valid_loss has not been computed yet best_validation_loss = np.inf best_iter = -1 lr_iter = -1 test_score = 0. start_time = time.time() done_looping = False epoch = 0 # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: print("\nEpoch\tTrain Loss\tValid Loss\tBest-ValLoss-and-Iter\tTime\tL.Rate") sys.setrecursionlimit(10000) try: # Early-stopping implementation while (not done_looping) and (epoch<num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True): inputs, targets = batch # Horizontal flip half of the images bs = inputs.shape[0] indices = np.random.choice(bs, bs / 2, replace=False) inputs[indices] = inputs[indices, :, :, ::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead train_err_batch = bwd_fn(inputs, targets) train_err += train_err_batch train_batches += 1 iter_now = epoch*n_train_batches + train_batches training_history['iter_training_loss'].append(train_err_batch) training_history['iter_validation_loss'].append(epoch_validation_loss) if (iter_now+1) % validation_frequency == 0: # a full pass over the validation data: val_err = 0 #zero_one_err = 0 val_batches = 0 for batch in iterate_minibatches(Xaug_val, Yaug_val, batchsize, shuffle=False): inputs, targets = batch # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead val_err_batch = fwd_fn(inputs, targets) val_err += val_err_batch val_batches += 1 epoch_validation_loss = val_err / val_batches if epoch_validation_loss < best_validation_loss: if epoch_validation_loss < best_validation_loss*improvement_threshold: patience = max(patience, iter_now * patience_increase) # lr_patience *= lr_patience_increase best_params = lasagne.layers.get_all_param_values(network) best_validation_loss = epoch_validation_loss best_iter = iter_now lr_iter = best_iter else: # decay learning rate if optim=='momentum' if optim=='momentum' and (iter_now - lr_iter) > lr_patience: lr.set_value(lr.get_value() * lr_decay) lr_iter = iter_now if patience <= iter_now: done_looping = True break # Record training history training_history['training_loss'].append(train_err / train_batches) training_history['validation_loss'].append(epoch_validation_loss) training_history['learning_rate'].append(lr.get_value()) epoch_time = time.time() - start_time module1_time += epoch_time # Then we print the results for this epoch: print("{}\t{:.6f}\t{:.6f}\t{:.6f}\t{}\t{:.3f}\t{}".format( epoch+1, training_history['training_loss'][-1], training_history['validation_loss'][-1], best_validation_loss, best_iter+1, epoch_time, training_history['learning_rate'][-1] )) if (epoch+1)%snapshot==0: # TODO try to save weights at best_iter snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) try: # use case: terminate experiment before reaching `reps` np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass epoch += 1 except KeyboardInterrupt, MemoryError: # Sadly this can only catch KeyboardInterrupt pass print 'Training finished or KeyboardInterrupt (Training is never finished, only abandoned)' module1_time_eff = module1_time / iter_now * best_iter print('Total and Effective training time are {:.0f} and {:.0f}').format( module1_time, module1_time_eff) time_profiles['train_module1'].append(module1_time) time_profiles['train_module1_eff'].append(module1_time_eff) # Save model after num_epochs or KeyboardInterrupt if (epoch+1)%snapshot!=0: # to avoid duplicate save snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) if not toy: try: # use case: terminate experiment before reaching `reps` print 'Saving model...' np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass # And load them again later on like this: #with np.load('../snapshot_models/23alex16042023213910.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # or # training_history = f['arr_0'].items() # lasagne.layers.set_all_param_values(network, param_values) # END OF MODULE 1 # START MODULE 2 print '\nMODULE 2' if not do_module1: if pretrained_w_path.endswith('pkl'): snapshot_name = str(num_classes)+'alexOTS' # short for "off-the-shelf init" elif pretrained_w_path.endswith('npz'): # Resume from a SINGLE snapshot # extract name pattern, e.g. '23alex16042023213910full10' # from string '../snapshot_models/23alex16042023213910full10_100.npz' import re regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+" match = re.search(regex, pretrained_w_path) snapshot_name = match.group(0) elif pretrained_w_path.endswith('/'): # RESUMED FROM TRAINED MODULE 1 (ONE-TIME USE) from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) else: # MAIN BRANCH - assume do_module1 is True AND have run `snapshot` epochs if (epoch+1)>snapshot: with np.load(snapshot_path_string+'.npz') as f: # reload the best params for module 1 w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) score_train = compute_score(Xaug_train_val, Yaug_train_val) start_time = time.time() if load_t: # Server failed at the wrong time. We only have t backed-up if pretrained_w_path.endswith('/'): from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) t_train = np.load(osp.join('t','{0}.npy'.format(snapshot_name))) else: # MAIN BRANCH thresholds = Threshold(score_train, Yaug_train_val) thresholds.find_t_for() # determine t_train for each score_train. It will take a while t_train = np.asarray(thresholds.t) print 't_train is in ', t_train.min(), '..', t_train.max() # `thresholds` holds t_train vector in .t attribute print('t_train produced in {:.3f}s').format(time.time()-start_time) np.save('t/'+snapshot_name+str(seed)+'.npy', t_train) # Predictive model for t regr = linear_model.RidgeCV(cv=5) # Ridge() is LinearClassifier() with L2-reg regr.fit(score_train, t_train) time_profiles['train_module2'].append(time.time()-start_time) # END OF MODULE 2 # TESTING PHASE start_time = time.time() score_test = compute_score(X_test, Y_test) t_test = regr.predict(score_test) print 'original t_test is in ', min(t_test), '..', max(t_test) t_test[t_test>1] = max(t_test[t_test<1]) t_test[t_test<0] = min(t_test[t_test>0]) # ! Keep t_test in [0,1] print 'corrected t_test is in ', min(t_test), '..', max(t_test) # Predict label metrics = predict_label(score_test, Y_test, t_test, seed, num_classes, verbose=1) time_profiles['test'].append(time.time()-start_time) all_metrics.append(metrics)
def run(X_train, y_train, X_val, y_val, X_test, y_test): import pickle import cProfile kron_params = [{ 'param_density': p } for p in np.linspace(0.0, 0.0, 1, endpoint=False)] num_epochs = 5 batch_size = 100 hidden_units = [100**2] trains, accs = list( zip(*([ generate_train_acc( widths=hidden_units, type="old_kron", params=kron_param) for kron_param in kron_params ]))) names = ["old_kron({})".format(p.values()) for p in kron_params] results = {} for train, acc, name in zip(trains, accs, names): res = {} res["train_fun"] = train res["accuracy_fun"] = acc res["train_err"] = [] res["train_acc"] = [] res["epoch_times"] = [] res["val_acc"] = [] results[name] = res # Just profile if you need pr = cProfile.Profile() pr.enable() for epoch in range(num_epochs): for (res_name, res) in results.items(): train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size): inputs, targets = batch train_err_batch, train_acc_batch = res["train_fun"](inputs, targets) train_err += train_err_batch train_acc += train_acc_batch train_batches += 1 # And a full pass over the validation data: val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size): inputs, targets = batch val_acc += res["accuracy_fun"](inputs, targets) val_batches += 1 # Then we print the results for this epoch: print("for {}".format(res_name)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss (in-iteration):\t\t{:.6f}".format( train_err / train_batches)) print(" train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) res["train_err"].append(train_err / train_batches) res["train_acc"].append(train_acc / train_batches * 100) res["val_acc"].append(val_acc / val_batches * 100) # Just profile if you need pr.disable() pr.print_stats(sort='cumtime') for res in results.values(): res.pop('train_fun') res.pop('accuracy_fun') with open("comparative_history.dict", 'wb') as pickle_file: pickle.dump(results, pickle_file)
def train(n_channels=3, resolution=32, z_dim=128, n_labels=0, lr=1e-3, e_drift=1e-3, wgp_target=750, initial_resolution=4, total_kimg=25000, training_kimg=500, transition_kimg=500, iters_per_checkpoint=500, n_checkpoint_images=16, glob_str='cifar10', out_dir='cifar10'): # instantiate logger logger = SummaryWriter(out_dir) # load data batch_size = MINIBATCH_OVERWRITES[0] train_iterator = iterate_minibatches(glob_str, batch_size, resolution) # build models G = Generator(n_channels, resolution, z_dim, n_labels) D = Discriminator(n_channels, resolution, n_labels) G_train, D_train = GAN(G, D, z_dim, n_labels, resolution, n_channels) D_opt = Adam(lr=lr, beta_1=0.0, beta_2=0.99, epsilon=1e-8) G_opt = Adam(lr=lr, beta_1=0.0, beta_2=0.99, epsilon=1e-8) # define loss functions D_loss = [loss_mean, loss_gradient_penalty, 'mse'] G_loss = [loss_wasserstein] # compile graphs used during training G.compile(G_opt, loss=loss_wasserstein) D.trainable = False G_train.compile(G_opt, loss=G_loss) D.trainable = True D_train.compile(D_opt, loss=D_loss, loss_weights=[1, GP_WEIGHT, e_drift]) # for computing the loss ones = np.ones((batch_size, 1), dtype=np.float32) zeros = ones * 0.0 # fix a z vector for training evaluation z_fixed = np.random.normal(0, 1, size=(n_checkpoint_images, z_dim)) # vars resolution_log2 = int(np.log2(resolution)) starting_block = resolution_log2 starting_block -= np.floor(np.log2(initial_resolution)) cur_block = starting_block cur_nimg = 0 # compute duration of each phase and use proxy to update minibatch size phase_kdur = training_kimg + transition_kimg phase_idx_prev = 0 # offset variable for transitioning between blocks offset = 0 i = 0 while cur_nimg < total_kimg * 1000: # block processing kimg = cur_nimg / 1000.0 phase_idx = int(np.floor((kimg + transition_kimg) / phase_kdur)) phase_idx = max(phase_idx, 0.0) phase_kimg = phase_idx * phase_kdur # update batch size and ones vector if we switched phases if phase_idx_prev < phase_idx: batch_size = MINIBATCH_OVERWRITES[phase_idx] train_iterator = iterate_minibatches(glob_str, batch_size) ones = np.ones((batch_size, 1), dtype=np.float32) zeros = ones * 0.0 phase_idx_prev = phase_idx # possibly gradually update current level of detail if transition_kimg > 0 and phase_idx > 0: offset = (kimg + transition_kimg - phase_kimg) / transition_kimg offset = min(offset, 1.0) offset = offset + phase_idx - 1 cur_block = max(starting_block - offset, 0.0) # update level of detail K.set_value(G_train.cur_block, np.float32(cur_block)) K.set_value(D_train.cur_block, np.float32(cur_block)) # train D for j in range(N_CRITIC_ITERS): z = np.random.normal(0, 1, size=(batch_size, z_dim)) real_batch = next(train_iterator) fake_batch = G.predict_on_batch([z]) interpolated_batch = get_interpolated_images( real_batch, fake_batch) losses_d = D_train.train_on_batch( [real_batch, fake_batch, interpolated_batch], [ones, ones * wgp_target, zeros]) cur_nimg += batch_size # train G z = np.random.normal(0, 1, size=(batch_size, z_dim)) loss_g = G_train.train_on_batch(z, -1 * ones) logger.add_scalar("cur_block", cur_block, i) logger.add_scalar("learning_rate", lr, i) logger.add_scalar("batch_size", z.shape[0], i) print("iter", i, "cur_block", cur_block, "lr", lr, "kimg", kimg, "losses_d", losses_d, "loss_g", loss_g) if (i % iters_per_checkpoint) == 0: G.trainable = False fake_images = G.predict(z_fixed) # log fake images log_images(fake_images, 'fake', i, logger, fake_images.shape[1], fake_images.shape[2], int(np.sqrt(n_checkpoint_images))) # plot real images for reference log_images(real_batch[:n_checkpoint_images], 'real', i, logger, real_batch.shape[1], real_batch.shape[2], int(np.sqrt(n_checkpoint_images))) # save the model to eventually resume training or do inference save_model(G, out_dir + "/model.json", out_dir + "/model.h5") log_losses(losses_d, loss_g, i, logger) i += 1
def train(self): """Trains a model.""" steps = 0 # ========= # evaluate on development set val_aer, val_acc, val_loss = self.model.evaluate( self.dev_corpus, self.dev_wa, batch_size=self.batch_size) # print Epoch loss print("Epoch {} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}".format( 0, val_aer, val_acc, val_loss)) #======== for epoch_id in range(1, self.num_epochs + 1): # shuffle data set every epoch print("Shuffling training data") random.shuffle(self.corpus) loss = 0.0 accuracy_correct = 0 accuracy_total = 0 epoch_steps = 0 for batch_id, batch in enumerate( iterate_minibatches(self.corpus, batch_size=self.batch_size), 1): # Dynamic learning rate, cf. Bottou (2012), Stochastic gradient descent tricks. lr_t = self.lr * (1 + self.lr * self.lr_decay * steps)**-1 x, y = prepare_data(batch, self.model.x_vocabulary, self.model.y_vocabulary) # If you want to see the data that goes into the model during training # you may uncomment this. #if batch_id % 1000 == 0: # print(" ".join([str(t) for t in x[0]])) # print(" ".join([str(t) for t in y[0]])) # print(" ".join([self.model.x_vocabulary.get_token(t) for t in x[0]])) # print(" ".join([self.model.y_vocabulary.get_token(t) for t in y[0]])) # input to the TF graph feed_dict = { self.lr_ph: lr_t, self.model.x: x, self.model.y: y, self.model.is_training: True } # things we want TF to return to us from the computation fetches = { "optimizer": self.optimizer, "loss": self.model.loss, "acc_correct": self.model.accuracy_correct, "acc_total": self.model.accuracy_total, "pa_x": self.model.pa_x, "py_xa": self.model.py_xa, "py_x": self.model.py_x, "KL": self.model.KL # "a" : self.model.a, # "b" : self.model.b, # "alpha" : self.model.alpha, # "beta" : self.model.beta } res = self.session.run(fetches, feed_dict=feed_dict) loss += res["loss"] accuracy_correct += res["acc_correct"] accuracy_total += res["acc_total"] batch_accuracy = res["acc_correct"] / float(res["acc_total"]) steps += 1 epoch_steps += 1 if batch_id % 100 == 0: # print(res["KL"]) # print(res["a"]) # print(res["b"]) # print(res["alpha"]) # print(res["beta"]) print("Iter {:5d} loss {:6f} accuracy {:1.2f} lr {:1.6f}". format(batch_id, res["loss"], batch_accuracy, lr_t)) # evaluate on development set val_aer, val_acc, val_loss = self.model.evaluate( self.dev_corpus, self.dev_wa, batch_size=self.batch_size, training=True) # print Epoch loss print( "Train=true: Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}" .format(epoch_id, loss / float(epoch_steps), accuracy_correct / float(accuracy_total), val_aer, val_acc, val_loss)) val_aer, val_acc, val_loss = self.model.evaluate( self.dev_corpus, self.dev_wa, batch_size=self.batch_size) # print Epoch loss print( "Train=False: Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f} val_loss {:6f}" .format(epoch_id, loss / float(epoch_steps), accuracy_correct / float(accuracy_total), val_aer, val_acc, val_loss)) # save parameters save_path = self.model.save(self.session, path="model.ckpt") print("Model saved in file: %s" % save_path)
def train(self): """Trains a model.""" steps = 0 for epoch_id in range(1, self.num_epochs + 1): # shuffle data set every epoch print("Shuffling training data") random.shuffle(self.corpus) loss = 0.0 accuracy_correct = 0 accuracy_total = 0 epoch_steps = 0 for batch_id, batch in enumerate(iterate_minibatches( self.corpus, batch_size=self.batch_size), 1): # Dynamic learning rate, cf. Bottou (2012), Stochastic gradient descent tricks. lr_t = self.lr * (1 + self.lr * self.lr_decay * steps)**-1 x, y = prepare_data(batch, self.model.x_vocabulary, self.model.y_vocabulary) # If you want to see the data that goes into the model during training # you may uncomment this. #if batch_id % 1000 == 0: # print(" ".join([str(t) for t in x[0]])) # print(" ".join([str(t) for t in y[0]])) # print(" ".join([self.model.x_vocabulary.get_token(t) for t in x[0]])) # print(" ".join([self.model.y_vocabulary.get_token(t) for t in y[0]])) # input to the TF graph feed_dict = { self.lr_ph : lr_t, self.model.x : x, self.model.y : y } # things we want TF to return to us from the computation fetches = { "optimizer" : self.optimizer, "loss" : self.model.loss, "acc_correct" : self.model.accuracy_correct, "acc_total" : self.model.accuracy_total, "pa_x" : self.model.pa_x, "py_xa" : self.model.py_xa, "py_x" : self.model.py_x } res = self.session.run(fetches, feed_dict=feed_dict) loss += res["loss"] accuracy_correct += res["acc_correct"] accuracy_total += res["acc_total"] batch_accuracy = res["acc_correct"] / float(res["acc_total"]) steps += 1 epoch_steps += 1 if batch_id % 100 == 0: print("Iter {:5d} loss {:6f} accuracy {:1.2f} lr {:1.6f}".format( batch_id, res["loss"], batch_accuracy, lr_t)) if batch_id % 5000 == 0: #break after 5000, to keep computation time down. break # evaluate on development set val_aer, val_acc = self.model.evaluate(self.dev_corpus, self.dev_wa) self.Aer.append(val_aer) self.Loss.append(loss) # print Epoch loss print("Epoch {} loss {:6f} accuracy {:1.2f} val_aer {:1.2f} val_acc {:1.2f}".format( epoch_id, loss / float(epoch_steps), accuracy_correct / float(accuracy_total), val_aer, val_acc)) # save parameters save_path = self.model.save(self.session, path="D:/Roderick/Documents/Master/5 NLP2/project three/project_neuralibm/model.ckpt") print("Model saved in file: %s" % save_path) plt.figure() plt.title('AER') plt.plot(self.Aer) print("AER", self.Aer) plt.figure() plt.title('Loss') plt.plot(self.Loss) print("Loss", self.Loss)