def train(net, epochs=10, batch_size=32, lr=3e-3, clip=1, print_every=256): # Optimizer optimizer = torch.optim.Adam(net.parameters(), lr=lr) # Loss criterion = nn.CrossEntropyLoss() counter = 0 net.train() for e in range(epochs): # Init hidden state h = net.init_hidden(batch_size) for x, y in get_batches(x_int, y_int, batch_size): counter += 1 inputs, targets = torch.from_numpy(x), torch.from_numpy(y) h = tuple([each.data for each in h]) net.zero_grad() output, h = net(inputs, h) loss = criterion(output, targets.view(-1)) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), clip) optimizer.step() if counter % print_every == 0: print(f'Epoch: {e+1}/{epochs}', f'Step: {counter}...')
def do_epoch(self, data, optimizer): self.model.train() avg_loss = 0 epoch_start_time = time.time() batches = data.get_batches(self.batch_size) for batch in batches: self.model.zero_grad() X, Y1, Y2, lengths = data.tensorize_batch(batch, self.device, self.model.width) loss = self.model(X, Y1, Y2, lengths, is_training=True) avg_loss += loss.item() / len(batches) loss.backward() optimizer.step() acc, vm, _, _ = self.evaluate(data) epoch_time = time.time() - epoch_start_time return avg_loss, acc, vm, epoch_time
def evaluate(self, data): self.model.eval() batches = data.get_batches(self.batch_size) zseqs = [[False for w in sent] for sent in data.sents] clustering = [{} for z in range(self.model.num_labels)] with torch.no_grad(): for batch in batches: X, Y1, Y2, lengths = data.tensorize_batch(batch, self.device, self.model.width) future_probs, future_indices = self.model(X, Y1, Y2, lengths, is_training=False) for k, (i, j) in enumerate(batch): z = future_indices[k].item() zseqs[i][j] = z clustering[z][data.sents[i][j]] = True acc = compute_many2one_acc(data.golds, zseqs) vm = compute_v_measure(data.golds, zseqs) return acc, vm, zseqs, clustering
def train(self, epoch, batch_size, continued=False): config = tf.ConfigProto() config.gpu_options.allow_growth = True batches = data.get_batches(batch_size=batch_size, word2idx=self.word2idx, idx2vec=self.idx2vec) with tf.Session(config=config) as sess: if continued: ckpt_path = tf.train.latest_checkpoint('./checkpoint') print 'restore %s' % (ckpt_path) self.saver.restore(sess, ckpt_path) else: init = tf.global_variables_initializer() sess.run(init) for i in range(epoch): print 'epoch %d...' % (i + 1) train_size = int(0.9 * len(batches)) for batch_num in range(train_size): sess.run(self.opt, feed_dict={ self.Ques: batches[batch_num]['quests'], self.Ques_seq_len: batches[batch_num]['quest_len'], self.Pos_Ans: batches[batch_num]['pos_ans'], self.Pos_Ans_len: batches[batch_num]['pos_ans_len'], self.Neg_Ans: batches[batch_num]['neg_ans'], self.Neg_Ans_len: batches[batch_num]['neg_ans_len'], self.keep_prob: 0.5 }) train_error = self.evaluate(sess, batches[0:train_size], batch_size) print 'train error %f' % (train_error) test_error = self.evaluate(sess, batches[train_size:], batch_size) print 'test error %f' % (test_error) self.saver.save(sess, './checkpoint/new.ckpt', global_step=i)
def train_with_graph(graph, qp_pairs, dev_qp_pairs): ''' Train a network from a specific graph. ''' global sess with tf.Graph().as_default(): train_model = GAG(cfg, embed, graph) train_model.build_net(is_training=True) tf.get_variable_scope().reuse_variables() dev_model = GAG(cfg, embed, graph) dev_model.build_net(is_training=False) with tf.Session() as sess: logger.debug('init variables') init = tf.global_variables_initializer() sess.run(init) # writer = tf.summary.FileWriter('%s/graph/'%execution_path, sess.graph) logger.debug('assign to graph') saver = tf.train.Saver() train_loss = None bestacc = 0 patience = 5 patience_increase = 2 improvement_threshold = 0.995 for epoch in range(max_epoch): logger.debug('begin to train') train_batches = data.get_batches(qp_pairs, cfg.batch_size) train_loss = run_epoch(train_batches, train_model, True) logger.debug('epoch ' + str(epoch) + ' loss: ' + str(train_loss)) dev_batches = list( data.get_batches(dev_qp_pairs, cfg.batch_size)) _, position1, position2, ids, contexts = run_epoch( dev_batches, dev_model, False) answers = generate_predict_json(position1, position2, ids, contexts) if save_path is not None: with open( os.path.join(save_path, 'epoch%d.prediction' % epoch), 'w') as file: json.dump(answers, file) else: answers = json.dumps(answers) answers = json.loads(answers) iter = epoch + 1 acc = evaluate.evaluate_with_predictions( args.dev_file, answers) logger.debug('Send intermediate acc: %s', str(acc)) nni.report_intermediate_result(acc) logger.debug('Send intermediate result done.') if acc > bestacc: if acc * improvement_threshold > bestacc: patience = max(patience, iter * patience_increase) bestacc = acc if save_path is not None: saver.save( os.path.join(sess, save_path + 'epoch%d.model' % epoch)) with open( os.path.join(save_path, 'epoch%d.score' % epoch), 'wb') as file: pickle.dump((position1, position2, ids, contexts), file) logger.debug('epoch %d acc %g bestacc %g' % (epoch, acc, bestacc)) if patience <= iter: break logger.debug('save done.') return train_loss, bestacc
def main( # Dataset Configuration path_train='../train.json', # Path to load training set path_val='../val.json', # Path to load validation set path_test='../test.json', # Path to load testing set path_mat_train='../VGG19_train.npy', # Path of image features of training set path_mat_val='../VGG19_val.npy', # Path of image features of validation set path_mat_test='../VGG19_test.npy', # Path of image features of testing set max_samples_train=0, # Max number of samples in training set max_samples_val=0, # Max number of samples in validation set max_samples_test=0, # Max number of samples in testing set # Model Configuration n_dim_img=4096, # Dimension of image feature n_dim_txt=250, # Dimension of word embedding n_dim_enc=1000, # Number of hidden units in encoder n_dim_dec=1000, # Number of hidden units in decoder batch_size=64, # Batch size beam_size=10, # number of candidate(s) in beam search # Save & Load path_load='model.npz', # Path to load a previouly trained model - Required path_out_train='beam_train.json', # Path to save predicted sentences of training set path_out_val='beam_val.json', # Path to save predicted sentences of validation set path_out_test='beam_test.json', # Path to save predicted sentences of testing set ): ''' Main function ''' print('Loading data...') n_dim_vocab = 0 # Vocabulary size samples_train, mat_train, n_dim_vocab = load_data(path_train, path_mat_train, n_dim_vocab, max_samples_train) samples_val, mat_val, n_dim_vocab = load_data(path_val, path_mat_val, n_dim_vocab, max_samples_val) samples_test, mat_test, n_dim_vocab = load_data(path_test, path_mat_test, n_dim_vocab, max_samples_test) max_len = max([len(sample[1]) for sample in samples_train]) # Max length of sentences print('\ttraining: %6d samples' % len(samples_train)) print('\tvalidation: %6d samples' % len(samples_val)) print('\ttesting: %6d samples' % len(samples_test)) params = OrderedDict(numpy.load(path_load)) del params['costs'] t_params = OrderedDict() init_t_params(params, t_params) print('Building word sampler...') f_enc = build_enc(t_params, n_dim_img, n_dim_enc, n_dim_dec) f_dec = build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, beam_size) print('Predicting...') preds_train, err_train = predict( f_enc, f_dec, samples_train, get_batches(len(samples_train), batch_size), mat_train, beam_size, max_len, 'PREDICT TRA') with open(path_out_train, 'w') as file_out: json.dump(preds_train, file_out) preds_val, err_val = predict(f_enc, f_dec, samples_val, get_batches(len(samples_val), batch_size), mat_val, beam_size, max_len, 'PREDICT VAL') with open(path_out_val, 'w') as file_out: json.dump(preds_val, file_out) preds_test, err_test = predict(f_enc, f_dec, samples_test, get_batches(len(samples_test), batch_size), mat_test, beam_size, max_len, 'PREDICT TES') with open(path_out_test, 'w') as file_out: json.dump(preds_test, file_out) print('ERR TRA: %f ERR VAL: %f ERR TES: %f' % (err_train, err_val, err_test)) print('Done.')
def main( # Dataset Configuration path_train='../train.json', # Path to load training set path_val='../val.json', # Path to load validation set path_mat_train='../VGG19_train.npy', # Path of image features of training set path_mat_val='../VGG19_val.npy', # Path of image features of validation set max_samples_train=0, # Max number of samples in training set max_samples_val=0, # Max number of samples in validation set # Model Configuration n_dim_img=4096, # Dimension of image feature n_dim_txt=250, # Dimension of word embedding n_dim_enc=1000, # Number of hidden units in encoder n_dim_dec=1000, # Number of hidden units in decoder batch_size_train=64, # Batch size in training batch_size_test=64, # Batch size in validation optimizer=adadelta, # [sgd|adam|adadelta|rmsprop], sgd not recommanded lrate=0.0002, # Learning rate for optimizer max_epochs=1000, # Maximum number of epoch to run patience=10, # Number of epoch to wait before early stop if no progress # Frequency ratio_val=1., # Validation frequency - Validate model after trained by this ratio of data ratio_save=1., # Save frequency - Save the best parameters after trained by this ratio of data # Save & Load path_load=None, # Path to load a previouly trained model path_save='model', # Path to save the models ): ''' Main function ''' print('Loading data...') n_dim_vocab = 0 # Vocabulary size samples_train, mat_train, n_dim_vocab = load_data(path_train, path_mat_train, n_dim_vocab, max_samples_train) samples_val, mat_val, n_dim_vocab = load_data(path_val, path_mat_val, n_dim_vocab, max_samples_val) print('\ttraining: %6d samples' % len(samples_train)) print('\tvalidation: %6d samples' % len(samples_val)) t_params = OrderedDict() best_params = None costs = [] if path_load: best_params = OrderedDict(numpy.load(path_load)) costs.extend(best_params['costs']) del best_params['costs'] init_t_params(best_params, t_params) print('Building model...') f_cost, f_update = build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer) print('Training...') time_start = time.time() batches_val = get_batches(len(samples_val), batch_size_test) n_epochs = 0 n_samples = 0 n_bad_costs = 0 n_stops = 0 next_val = ratio_val * len(samples_train) next_save = max(ratio_save * len(samples_train), next_val) while n_epochs < max_epochs: n_epochs += 1 batches_train = get_batches(len(samples_train), batch_size_train, True) pgb_train = ProgressBar(len(batches_train), 20, 'EPOCH %4d ' % n_epochs) costs_train = [] for batch_train in batches_train: n_samples += len(batch_train) get_cost(f_cost, samples_train, batch_train, mat_train, costs_train, pgb_train, f_update, lrate) if n_samples >= next_val: next_val += ratio_val * len(samples_train) pgb_train.pause() pgb_val = ProgressBar(len(batches_val), 20, 'VALIDATION ') costs_val = [] for batch_val in batches_val: get_cost(f_cost, samples_val, batch_val, mat_val, costs_val, pgb_val) costs.append(numpy.mean(costs_val)) if best_params is None or costs[-1] <= numpy.min(costs): best_params = params_unzip(t_params) n_bad_costs = 0 else: n_bad_costs += 1 if n_bad_costs > patience: n_stops += 1 print('WARNING: early stop for %d time(s)!' % n_stops) params_zip(best_params, t_params) n_bad_costs = 0 if path_save and n_samples >= next_save: next_save = max(next_save + ratio_save * len(samples_train), next_val) pgb_train.pause() print('Saving model...') if best_params is not None: params = best_params else: params = params_unzip(t_params) numpy.savez(path_save, costs=costs, **params) numpy.savez('%s_%f' % (path_save, costs_train[-1]), costs=costs, **params_unzip(t_params)) time_end = time.time() print('Training finished') print('TIME: %9.3f sec EPOCHS: %4d SPEED: %9.3f sec/epoch' % (time_end - time_start, n_epochs, (time_end - time_start) / n_epochs)) if best_params is not None: params_zip(best_params, t_params) else: best_params = params_unzip(t_params) print('Saving final model...') if path_save: numpy.savez(path_save, costs=costs, **best_params) print('Done.')
agent1_loss_history = [] os.makedirs('checkpoints', exist_ok=True) def print_round_stats(acc, sl, loss): print("*******") print("Round average accuracy: %.2f" % (acc * 100)) print("Round average sentence length: %.1f" % sl) print("Round average loss: %.1f" % loss) print("*******") for round in range(args.num_rounds): print("********** round %d **********" % round) batches = get_batches(images_dict, args.data_n_samples, args.num_games_per_round, args.batch_size) round_accuracy, round_loss, round_sentence_length = train_round( agent1, agent2, batches, optimizer2, args.max_sentence_len, args.vocab_size) print_round_stats(round_accuracy, round_loss, round_sentence_length) agent1_accuracy_history.append(round_accuracy) agent1_message_length_history.append(round_sentence_length / 20) agent1_loss_history.append(round_loss) round += 1 print("replacing roles") print("********** round %d **********" % round) round_accuracy, round_loss, round_sentence_length = train_round(
CODE_PATH = '' # # if not os.path.isdir(CODE_PATH): # # os.mkdir(CODE_PATH) # # DATA_PATH = sys.argv[1] # batch_size = sys.argv[2]#128#64 batch_size = 2 latent_dim = 6 # # # obtener datos X_train, y_train, X_val, y_val, X_test, y_test = data.read_data(DATA_PATH) X_train, y_train = data.get_data(X_train, y_train) X_val, y_val = data.get_data(X_val, y_val) X_test, y_test = data.get_data(X_test, y_test) X_train_batch, y_train_batch = data.get_batches(X_train, y_train, batch_size) X_val_batch, y_val_batch = data.get_batches(X_val, y_val, batch_size) X_test_batch, y_test_batch = data.get_batches(X_test, y_test, batch_size) images = tf.placeholder(tf.float32, [None, 128, 128, 1], name="images") y_true = tf.placeholder(tf.int32, [None, 128, 128, 1], name="y_true") unet_seg = model.Res_Unet(images) # unet_seg = model.Unet(images) prior_mvn = model.Prior_net(images, latent_dim, 'prior_dist') # z_prior = prior_mvn.sample() # seg_prior = model.Fcomb(unet_seg, z_prior, 'prior') posterior_mvn = model.Posterior_net(images, y_true, latent_dim, 'post_dist')
def main( # Dataset Configuration path_train='../train.json', # Path to load training set path_val='../val.json', # Path to load validation set path_test='../test.json', # Path to load testing set path_mat_train='../VGG19_train.npy', # Path of image features of training set path_mat_val='../VGG19_val.npy', # Path of image features of validation set path_mat_test='../VGG19_test.npy', # Path of image features of testing set max_samples_train=0, # Max number of samples in training set max_samples_val=0, # Max number of samples in validation set max_samples_test=0, # Max number of samples in testing set # Model Configuration n_dim_img=4096, # Dimension of image feature n_dim_txt=250, # Dimension of word embedding n_dim_enc=1000, # Number of hidden units in encoder n_dim_dec=1000, # Number of hidden units in decoder batch_size=64, # Batch size beam_size=10, # number of candidate(s) in beam search # Save & Load path_load='model.npz', # Path to load a previouly trained model - Required path_out_train='beam_train.json', # Path to save predicted sentences of training set path_out_val='beam_val.json', # Path to save predicted sentences of validation set path_out_test='beam_test.json', # Path to save predicted sentences of testing set ): ''' Main function ''' print('Loading data...') n_dim_vocab = 0 # Vocabulary size samples_train, mat_train, n_dim_vocab = load_data(path_train, path_mat_train, n_dim_vocab, max_samples_train) samples_val, mat_val, n_dim_vocab = load_data(path_val, path_mat_val, n_dim_vocab, max_samples_val) samples_test, mat_test, n_dim_vocab = load_data(path_test, path_mat_test, n_dim_vocab, max_samples_test) max_len = max([len(sample[1]) for sample in samples_train]) # Max length of sentences print('\ttraining: %6d samples' % len(samples_train)) print('\tvalidation: %6d samples' % len(samples_val)) print('\ttesting: %6d samples' % len(samples_test)) params = OrderedDict(numpy.load(path_load)) del params['costs'] t_params = OrderedDict() init_t_params(params, t_params) print('Building word sampler...') f_enc = build_enc(t_params, n_dim_img, n_dim_enc, n_dim_dec) f_dec = build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, beam_size) print('Predicting...') preds_train, err_train = predict(f_enc, f_dec, samples_train, get_batches(len(samples_train), batch_size), mat_train, beam_size, max_len, 'PREDICT TRA') with open(path_out_train, 'w') as file_out: json.dump(preds_train, file_out) preds_val, err_val = predict(f_enc, f_dec, samples_val, get_batches(len(samples_val), batch_size), mat_val, beam_size, max_len, 'PREDICT VAL') with open(path_out_val, 'w') as file_out: json.dump(preds_val, file_out) preds_test, err_test = predict(f_enc, f_dec, samples_test, get_batches(len(samples_test), batch_size), mat_test, beam_size, max_len, 'PREDICT TES') with open(path_out_test, 'w') as file_out: json.dump(preds_test, file_out) print('ERR TRA: %f ERR VAL: %f ERR TES: %f' % (err_train, err_val, err_test)) print('Done.')