def train_epoch(self, train_set, valid_data, epoch, shuffle=True): if shuffle: random.shuffle(train_set) train_set = batchnize_dataset(train_set, self.cfg.batch_size) num_batches = len(train_set) prog = Progbar(target=num_batches) for i, batch_data in enumerate(train_set): feed_dict = self._get_feed_dict( batch_data, emb_keep_prob=self.cfg["emb_keep_prob"], rnn_keep_prob=self.cfg["rnn_keep_prob"], attn_keep_prob=self.cfg["attn_keep_prob"], is_train=True, lr=self.cfg["lr"]) _, train_loss, summary = self.sess.run( [self.train_op, self.loss, self.summary], feed_dict=feed_dict) cur_step = (epoch - 1) * num_batches + (i + 1) prog.update(i + 1, [("Global Step", int(cur_step)), ("Train Loss", train_loss)]) self.train_writer.add_summary(summary, cur_step) if i % 100 == 0: valid_feed_dict = self._get_feed_dict(valid_data) valid_summary = self.sess.run(self.summary, feed_dict=valid_feed_dict) self.test_writer.add_summary(valid_summary, cur_step)
def validate(self): print('\n\nValidating epoch: %d' % self.epoch) print('-' * 80) total = len(self.dataset_val) val_generator = self.dataset_val.generator(self.options.batch_size) progbar = Progbar(total, width=25) for input_rgb in val_generator: originItems = [] sketchItems = [] for ix in range(len(input_rgb)): originItems.append(input_rgb[ix][0]) sketchItems.append(input_rgb[ix][1]) originItems = np.array(originItems) sketchItems = np.array(sketchItems) feed_dic = { self.input_rgb: originItems, self.input_gray: sketchItems.reshape(self.options.batch_size, 512, 512, 1) } self.sess.run([self.dis_loss, self.gen_loss, self.accuracy], feed_dict=feed_dic) lossD, lossD_fake, lossD_real, lossG, lossG_l1, lossG_gan, acc, step = self.eval_outputs( feed_dic=feed_dic) progbar.add(len(input_rgb), values=[("D loss", lossD), ("D fake", lossD_fake), ("D real", lossD_real), ("G loss", lossG), ("G L1", lossG_l1), ("G gan", lossG_gan), ("accuracy", acc)]) print('\n')
def run_epoch(self, sess, train, dev, tags, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset tags: {tag: index} dictionary epoch: (int) number of the epoch """ nbatches = ( len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) for i, (words, labels ) in enumerate(minibatches(train, self.config.batch_size)): fd, _ = self.get_feed_dict(words, labels, self.config.LR, self.config.dropout) _, train_loss, summary = sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) acc, f1 = self.run_evaluate(sess, dev, tags) self.logger.info( "- dev acc {:04.2f} - f1 {:04.2f}".format(100 * acc, 100 * f1)) return acc, f1
def validate(val_loader, model, criterion): top1 = AverageMeter('Acc@1', ':6.2f') progress = Progbar(len(val_loader)) # switch to evaluate mode model.eval() predicted = np.array([], dtype='float32') targets = np.array([], dtype='float32') with torch.no_grad(): for i, (images, target) in enumerate(val_loader): images = images.cuda() target = target.cuda() # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss predicted = np.append(predicted, np.argmax(output.cpu().numpy(), axis=1)) targets = np.append(targets, target.cpu().numpy()) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], images.size(0)) suffix = [('loss', loss.item()), ('acc', acc1[0].cpu().numpy())] progress.update(i + 1, suffix) # np.save(f'opt/predict_{epoch}.npy', predicted) # np.save(f'opt/target_{epoch}.npy', targets) return top1.avg
def run_evaluate(self, sess, test, tags, target='src'): accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. nbatces = (len(test) + self.args.batch_size - 1) // self.args.batch_size prog = Progbar(target=nbatces) for i, (words, labels, target_words) in enumerate( minibatches(test, self.args.batch_size)): if target == 'src': labels_pred, sequence_lengths = self.predict_batch( sess, words, mode=target, is_training=False) else: labels_pred, sequence_lengths = self.predict_batch( sess, None, words, mode=target, is_training=False) for lab, label_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = label_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) prog.update(i + 1) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, p, r, f1
def predict(self, X, mode='crf', partial_labels=None, use_bar=False): use_sentence_markers = self.options['SENTENCE_MARKERS'] if type(X) != list: X = [X] predictions = [] if use_bar: bar = Progbar(len(X)) else: bar = None for ix, elem in enumerate(X): sentence, feature_vector, sentence_markers = self.get_sentence_feature_vector(elem) if use_sentence_markers: assert sentence_markers is not None, "Sentence marker for %d is None" % (ix) else: sentence_markers = None if partial_labels is None: _, prediction = self.__call__(sentence, feature_vector, mode, sentence_markers=sentence_markers) else: _, prediction = self.__call__(sentence, feature_vector, mode, partial_labels[ix], sentence_markers=sentence_markers) for jx in xrange(len(prediction)): if partial_labels[ix][jx] != -1: assert partial_labels[ix][jx] == prediction[jx] predictions.append(prediction) if bar is not None: bar.update(ix + 1) if len(predictions) == 1: return predictions[0] else: return predictions
class Generator(object): def __init__(self, data, n_epochs, lowercase=True): self.data = data self.epoch_number = 0 self.model = None self.model_prefix = None self.n_epochs = n_epochs self.tokenizer = my_tokenize self.lowercase = lowercase def __iter__(self): if self.model is not None: # Training started self.epoch_number += 1 print 'STARTING EPOCH : (%d/%d)' % (self.epoch_number, self.n_epochs) sys.stdout.flush() self.bar = Progbar(len(self.data)) for idx, line in enumerate(self.data): self.bar.update(idx + 1) line = line.lower() if self.lowercase else line yield self.tokenizer(line) if self.model is not None: if self.epoch_number != self.n_epochs: SAVE_FILE_NAME = self.model_prefix + '_iter_' + str( self.epoch_number) + '.model' else: # Last Epoch SAVE_FILE_NAME = self.model_prefix + '.model' self.model.save(SAVE_FILE_NAME)
def cf(val_loader): progress = Progbar(len(val_loader)) model.eval() predicted = np.array([], dtype='float32') targets = np.array([], dtype='float32') with torch.no_grad(): for i, (images, target) in enumerate(val_loader): images = images.cuda() target = target.cuda() # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss predicted = np.append(predicted, np.argmax(output, axis=1)) targets = np.append(targets, target) acc1 = accuracy(output, target, topk=(1, )) end = time.time() suffix = [('loss', loss.item()), ('acc', acc1[0].cpu().numpy())] progress.update(i + 1, suffix) cfm = confusion_matrix(targets, predicted) return cfm
def fit(self, trainloader, validationloader, criterion, optimizer, epochs, val_per_batch): trainlosses = [] testlosses = [] self.criterion = criterion self.optimizer = optimizer for epoch in range(epochs): progbar = Progbar(target=len(trainloader) - 1) for batch, (data, target) in enumerate(trainloader): trainloss = 0 testloss = 0 self.train() data = data.type(torch.FloatTensor) target = target.type(torch.FloatTensor) if self.cuda: data, target = data.cuda(), target.cuda() self.optimizer.zero_grad() output = self.forward(data) loss = self.criterion(output, target) loss.backward() self.optimizer.step() trainloss = loss.item() self.eval() with torch.no_grad(): for data, target in validationloader: data = data.type(torch.FloatTensor) target = target.type(torch.FloatTensor) if self.cuda: data, target = data.cuda(), target.cuda() ps = self.forward(data) testloss += self.criterion(ps, target).item() testloss = testloss / len(validationloader) trainloss = trainloss / len(trainloader) trainlosses.append(trainloss) testlosses.append(testloss) progbar.update(current=batch, values=[('Epoch', epoch + 1), ('Training Loss', trainloss), ('Test Loss', testloss)]) self.trainlosses = trainlosses self.testlosses = testlosses return (trainlosses, testlosses)
def fit(self, X_train, y_train, X_val, y_val, n_epochs=200, batch_size=32, return_history=False): y_labels_val = np.argmax(y_val, axis=-1) y_labels_train = np.argmax(y_train, axis=-1) bar = Progbar(n_epochs) if return_history: history = { 'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'best_val_acc': None, 'Model_Save_Prefix': self.save_prefix } best_val_acc = None for epoch in xrange(n_epochs): # Shuffle the training data index = np.arange(X_train.shape[0]) np.random.shuffle(index) X = X_train[index] y = y_train[index] train_loss = 0. for ix in xrange(0, X.shape[0], batch_size): batch_x = X[ix:ix + batch_size] batch_y = y[ix:ix + batch_size] loss_train = self.train_batch(batch_x, batch_y) train_loss += loss_train * batch_x.shape[0] train_loss /= X.shape[0] train_acc = accuracy_score(y_labels_train, self.predict(X_train)) # Computing Validation Metrics val_loss, _ = self.optimizer.loss(y_val, self.forward(X_val, test=True)) val_acc = accuracy_score(y_labels_val, self.predict(X_val)) if best_val_acc is None or val_acc > best_val_acc: best_val_acc = val_acc model_file = self.save_prefix + "acc_%.4f_epoch_%d" % ( val_acc, epoch + 1) self.save_params(model_file) if return_history: history['train_loss'].append(train_loss) history['val_loss'].append(val_loss) history['train_acc'].append(train_acc) history['val_acc'].append(val_acc) bar.update(epoch + 1, values=[("train_loss", train_loss), ("val_loss", val_loss), ("train_acc", train_acc), ("val_acc", val_acc)]) if return_history: history['best_val_acc'] = best_val_acc return history
def _valid(data_loader, model, criterion, optimizer, epoch, opt, is_train=False): progbar = Progbar(title='Validating', target=len(data_loader), batch_size=opt.batch_size, total_examples=len(data_loader.dataset)) if is_train: model.train() else: model.eval() losses = [] # Note that the data should be shuffled every time for i, batch in enumerate(data_loader): src = batch.src trg = batch.trg if torch.cuda.is_available(): src.cuda() trg.cuda() decoder_probs, _, _ = model.forward(src, trg, must_teacher_forcing=True) start_time = time.time() loss = criterion(decoder_probs.contiguous().view(-1, opt.vocab_size), trg[:, 1:].contiguous().view(-1)) print("--loss calculation --- %s" % (time.time() - start_time)) start_time = time.time() if is_train: optimizer.zero_grad() loss.backward() if opt.max_grad_norm > 0: torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm) optimizer.step() print("--backward function - %s seconds ---" % (time.time() - start_time)) losses.append(loss.data[0]) start_time = time.time() progbar.update(epoch, i, [('valid_loss', loss.data[0])]) print("-progbar.update --- %s" % (time.time() - start_time)) return losses
def train(self, trainset, devset, testset, batch_size=64, epochs=50, shuffle=True): self.logger.info('Start training...') init_lr = self.cfg.lr # initial learning rate, used for decay learning rate best_score = 0.0 # record the best score best_score_epoch = 1 # record the epoch of the best score obtained no_imprv_epoch = 0 # no improvement patience counter for epoch in range(self.start_epoch, epochs + 1): self.logger.info('Epoch %2d/%2d:' % (epoch, epochs)) progbar = Progbar(target=(len(trainset) + batch_size - 1) // batch_size) # number of batches if shuffle: np.random.shuffle( trainset) # shuffle training dataset each epoch # training each epoch for i, (words, labels) in enumerate(batch_iter(trainset, batch_size)): feed_dict = self._get_feed_dict(words, labels, lr=self.cfg.lr, is_train=True) _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict) progbar.update(i + 1, [("train loss", train_loss)]) if devset is not None: self.evaluate(devset, batch_size) cur_score = self.evaluate(testset, batch_size, is_devset=False) # learning rate decay if self.cfg.decay_lr: self.cfg.lr = init_lr / (1 + self.cfg.lr_decay * epoch) # performs model saving and evaluating on test dataset if cur_score > best_score: no_imprv_epoch = 0 self.save_session(epoch) best_score = cur_score best_score_epoch = epoch self.logger.info( ' -- new BEST score on TEST dataset: {:05.3f}'.format( best_score)) else: no_imprv_epoch += 1 if no_imprv_epoch >= self.cfg.no_imprv_patience: self.logger.info( 'early stop at {}th epoch without improvement for {} epochs, BEST score: ' '{:05.3f} at epoch {}'.format(epoch, no_imprv_epoch, best_score, best_score_epoch)) break self.logger.info('Training process done...')
def run_epoch(self, sess, src_train, src_dev, tags, target_train, target_dev, n_epoch_noimprove): nbatces = (len(target_train) + self.target_batch_size - 1) // self.target_batch_size prog = Progbar(target=nbatces) total_loss = 0 src = minibatches(src_train, self.src_batch_size, circle=True) target = minibatches(target_train, self.target_batch_size, circle=True) for i in range(nbatces): src_words, src_tags, _ = next(src) target_words, target_tags, _ = next(target) labels = src_tags + target_tags feed_dict, _ = self.get_feed_dict(src_words, labels, target_words, self.args.learning_rate, self.args.dropout, self.src_batch_size, is_training=True) if self.args.penalty_ratio > 0: _, src_crf_loss, target_crf_loss, penalty_loss, loss = sess.run( [self.train_op, self.src_crf_loss, self.target_crf_loss, self.penalty_loss, self.loss], feed_dict=feed_dict) try: prog.update(i + 1, [("train loss", loss[0]), ("src crf", src_crf_loss), ("target crf", target_crf_loss), ("{} loss".format(self.args.penalty), penalty_loss)]) except: prog.update(i + 1, [("train loss", loss), ("src crf", src_crf_loss), ("target crf", target_crf_loss), ("{} loss".format(self.args.penalty), penalty_loss)]) else: _, src_crf_loss, target_crf_loss, loss = sess.run( [self.train_op, self.src_crf_loss, self.target_crf_loss, self.loss], feed_dict=feed_dict) try: prog.update(i + 1, [("train loss", loss[0]), ("src crf", src_crf_loss), ("target crf", target_crf_loss)]) except: prog.update(i + 1, [("train loss", loss), ("src crf", src_crf_loss), ("target crf", target_crf_loss)]) total_loss += loss self.info['loss'] += [total_loss / nbatces] acc, p, r, f1 = self.run_evaluate(sess, target_train, tags, target='target') self.info['dev'].append((acc, p, r, f1)) self.logger.critical( "target train acc {:04.2f} f1 {:04.2f} p {:04.2f} r {:04.2f}".format(100 * acc, 100 * f1, 100 * p, 100 * r)) acc, p, r, f1 = self.run_evaluate(sess, target_dev, tags, target='target') self.info['dev'].append((acc, p, r, f1)) self.logger.info( "dev acc {:04.2f} f1 {:04.2f} p {:04.2f} r {:04.2f}".format(100 * acc, 100 * f1, 100 * p, 100 * r)) return acc, p, r, f1
def train_epoch(self, train_set, valid_data, epoch): num_batches = len(train_set) prog = Progbar(target=num_batches) for i, batch_data in enumerate(train_set): feed_dict = self._get_feed_dict(batch_data, is_train=True, keep_prob=self.cfg["keep_prob"], lr=self.cfg["lr"]) _, train_loss, summary = self.sess.run([self.train_op, self.loss, self.summary], feed_dict=feed_dict) cur_step = (epoch - 1) * num_batches + (i + 1) prog.update(i + 1, [("Global Step", int(cur_step)), ("Train Loss", train_loss)]) self.train_writer.add_summary(summary, cur_step) if i % 100 == 0: valid_feed_dict = self._get_feed_dict(valid_data) valid_summary = self.sess.run(self.summary, feed_dict=valid_feed_dict) self.test_writer.add_summary(valid_summary, cur_step)
def run_epoch(self, sess, batch_size, training_set, validation_set, dropout): X_tr, Y_tr = training_set X_val, Y_val = validation_set prog = Progbar(target=int(math.ceil(X_tr.shape[0] / batch_size))) resize = (224, 224) for i, (train_x, train_y) in enumerate( get_minibatches(X_tr, Y_tr, batch_size, True, resize)): loss, corr = self.train_on_batch(sess, train_x, train_y, True, dropout) prog.update(i + 1, [('train_loss', loss), ('train_acc', np.sum(corr) / train_x.shape[0])]) prog = Progbar(target=int(math.ceil(X_val.shape[0] / batch_size))) val_loss, val_corr = 0, 0 for i, (val_x, val_y) in enumerate( get_minibatches(X_val, Y_val, batch_size, False, resize)): loss, corr = self.train_on_batch(sess, val_x, val_y, False) val_loss += loss val_corr += np.sum(corr) prog.update(i + 1, [('val_loss', loss), ('val_acc', np.sum(corr) / val_x.shape[0])]) print("Validation loss = {0:.3g} and accuracy = {1:.3g}".format( val_loss / X_val.shape[0], val_corr / X_val.shape[0]))
def train(self, train_set, dev_set, test_set): self.logger.info('Start training...') best_score = 0 # store the current best f1 score on dev_set, updated if new best one is derived no_imprv_epoch_count = 0 # count the continuous no improvement epochs init_lr = self.cfg.lr # initial learning rate num_batches = (len(train_set) + self.cfg.batch_size - 1) // self.cfg.batch_size self.add_summary() for epoch in range(1, self.cfg.epochs + 1): # run each epoch self.logger.info('Epoch %2d/%2d:' % (epoch, self.cfg.epochs)) prog = Progbar(target=num_batches) # nbatches for i, (words, labels) in enumerate( batch_iter(train_set, self.cfg.batch_size)): feed_dict, _ = self._get_feed_dict(words, True, labels, self.cfg.lr, self.cfg.keep_prob) _, train_loss, summary = self.sess.run( [self.train_op, self.loss, self.merged], feed_dict=feed_dict) prog.update(i + 1, [("train loss", train_loss)]) # add summary if i % 10 == 0: self.file_writer.add_summary(summary, (epoch - 1) * num_batches + i) self.evaluate(dev_set) # evaluate dev_set metrics = self.evaluate(test_set, eval_dev=False) # evaluate test_set cur_score = metrics['f1'] # learning rate decay method if self.cfg.lr_decay_method == 1: self.cfg.lr *= self.cfg.lr_decay else: self.cfg.lr = init_lr / (1 + self.cfg.lr_decay_rate * epoch) if cur_score > best_score: # performs early stop and parameters save no_imprv_epoch_count = 0 self.save_session( epoch) # save model with a new best score is obtained best_score = cur_score self.logger.info( ' -- new BEST score: {:04.2f}'.format(best_score)) else: no_imprv_epoch_count += 1 if no_imprv_epoch_count >= self.cfg.no_imprv_threshold: self.logger.info( 'early stop at {}th epoch without improvement for {} epochs, BEST score: {:04.2f}' .format(epoch, no_imprv_epoch_count, best_score)) self.save_session(epoch) # save the last one break self.logger.info('Training process done...') self.file_writer.close()
def evaluate_greedy(model, data_loader, test_examples, opt): model.eval() logging.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): logging.info('Running on GPU!') model.cuda() else: logging.info('Running on CPU!') logging.info( '====================== Start Predicting =========================') progbar = Progbar(title='Testing', target=len(data_loader), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset)) ''' Note here each batch only contains one data example, thus decoder_probs is flattened ''' for i, (batch, example) in enumerate(zip(data_loader, test_examples)): src = batch.src logging.info('====================== %d =========================' % (i + 1)) logging.info('\nSource text: \n %s\n' % (' '.join([opt.id2word[wi] for wi in src.data.numpy()[0]]))) if torch.cuda.is_available(): src.cuda() # trg = Variable(torch.from_numpy(np.zeros((src.size(0), opt.max_sent_length), dtype='int64'))) trg = Variable( torch.LongTensor([[opt.word2id[pykp.io.BOS_WORD]] * opt.max_sent_length])) max_words_pred = model.greedy_predict(src, trg) progbar.update(None, i, []) sentence_pred = [opt.id2word[x] for x in max_words_pred] sentence_real = example['trg_str'] if '</s>' in sentence_real: index = sentence_real.index('</s>') sentence_pred = sentence_pred[:index] logging.info('\t\tPredicted : %s ' % (' '.join(sentence_pred))) logging.info('\t\tReal : %s ' % (sentence_real))
def predict(self, sess, test, id_to_tag, id_to_word): nbatces = (len(test) + self.args.batch_size - 1) // self.args.batch_size prog = Progbar(target=nbatces) with open(self.args.predict_out, 'w+', encoding='utf8') as outfile: for i, (words, target_words, true_words) in enumerate(minibatches_evaluate(test, self.args.batch_size)): labels_pred, sequence_lengths = self.predict_batch(sess, words) for word, true_word, label_pred, length in zip(words, true_words, labels_pred, sequence_lengths): true_word = true_word[:length] lab_pred = label_pred[:length] for item, tag in zip(true_word, lab_pred): outfile.write(item + '\t' + id_to_tag[tag] + '\n') outfile.write('\n') prog.update(i + 1)
def train_epoch(self, train_set, valid_data, epoch): num_batches = len(train_set) prog = Progbar(target=num_batches) total_cost, total_samples = 0, 0 for i, batch in enumerate(train_set): feed_dict = self._get_feed_dict(batch, is_train=True, keep_prob=self.cfg["keep_prob"], lr=self.cfg["lr"]) _, train_loss, summary = self.sess.run([self.train_op, self.loss, self.summary], feed_dict=feed_dict) cur_step = (epoch - 1) * num_batches + (i + 1) total_cost += train_loss total_samples += np.array(batch["words"]).shape[0] prog.update(i + 1, [("Global Step", int(cur_step)), ("Train Loss", train_loss), ("Perplexity", np.exp(total_cost / total_samples))]) self.train_writer.add_summary(summary, cur_step) if i % 100 == 0: valid_feed_dict = self._get_feed_dict(valid_data) valid_summary = self.sess.run(self.summary, feed_dict=valid_feed_dict) self.test_writer.add_summary(valid_summary, cur_step)
def _valid(data_loader, model, criterion, optimizer, epoch, opt, is_train=False): progbar = Progbar(title='Validating', target=len(data_loader), batch_size=opt.batch_size, total_examples=len(data_loader.dataset)) if is_train: model.train() else: model.eval() losses = [] # Note that the data should be shuffled every time for i, batch in enumerate(data_loader): src = batch.src trg = batch.trg if torch.cuda.is_available(): src.cuda() trg.cuda() decoder_probs, _, _ = model.forward(src, trg, must_teacher_forcing=True) start_time = time.time() loss = criterion( decoder_probs.contiguous().view(-1, opt.vocab_size), trg[:, 1:].contiguous().view(-1) ) print("--loss calculation --- %s" % (time.time() - start_time)) start_time = time.time() if is_train: optimizer.zero_grad() loss.backward() if opt.max_grad_norm > 0: torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm) optimizer.step() print("--backward function - %s seconds ---" % (time.time() - start_time)) losses.append(loss.data[0]) start_time = time.time() progbar.update(epoch, i, [('valid_loss', loss.data[0])]) print("-progbar.update --- %s" % (time.time() - start_time)) return losses
def train(train_loader, model, criterion, optimizer): progress = Progbar(len(train_loader)) model.train() for i, (images, target) in enumerate(train_loader): images = images.cuda() target = target.cuda() # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, topk=(1, )) optimizer.zero_grad() loss.backward() optimizer.step() suffix = [('loss', loss.item()), ('acc', acc1[0].cpu().numpy())] progress.update(i + 1, suffix)
def train_epoch(self, X, y, show_bar=True): optimizer = optim.Adam(self.parameters()) if show_bar: bar = Progbar(len(X)) for ix, (elem, tags) in enumerate(zip(X, y)): self.zero_grad() sentence, feature_vector, sentence_markers = self.get_sentence_feature_vector(elem) if self.GPU: targets = torch.LongTensor(tags).cuda() else: targets = torch.LongTensor(tags) neg_log_likelihood = self.neg_log_likelihood(sentence, feature_vector, targets) neg_log_likelihood.backward() optimizer.step() if show_bar: bar.update(ix + 1) if show_bar: print '' sys.stdout.flush()
def train(self, train_set, dev_set, test_set, start_epoch=1, shuffle=True): self.logger.info('Start training...') best_score = 0 # store the current best f1 score on dev_set, updated if new best one is derived no_imprv_epoch_count = 0 # count the continuous no improvement epochs init_lr = self.cfg.lr # initial learning rate for epoch in range(start_epoch, self.cfg.epochs + 1): # run each epoch self.logger.info('Epoch %2d/%2d:' % (epoch, self.cfg.epochs)) prog = Progbar(target=(len(train_set) + self.cfg.batch_size - 1) // self.cfg.batch_size) # nbatches if shuffle: np.random.shuffle( train_set) # shuffle training dataset every epoch for i, (words, labels) in enumerate( batch_iter(train_set, self.cfg.batch_size)): feed_dict, _ = self._get_feed_dict(words, True, labels, self.cfg.lr, self.cfg.keep_prob) _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict) prog.update(i + 1, [("train loss", train_loss)]) self.evaluate(dev_set) # evaluate dev_set metrics = self.evaluate(test_set, eval_dev=False) # evaluate test_set cur_score = metrics['f1'] # learning rate decay if self.cfg.decay_lr: self.cfg.lr = init_lr / (1 + self.cfg.lr_decay_rate * epoch) if cur_score > best_score: # performs early stop and parameters save no_imprv_epoch_count = 0 self.save_session( epoch) # save model with a new best score is obtained best_score = cur_score self.logger.info( ' -- new BEST score: {:04.2f}\n'.format(best_score)) else: no_imprv_epoch_count += 1 if no_imprv_epoch_count >= self.cfg.no_imprv_threshold: self.logger.info( 'early stop at {}th epoch without improvement for {} epochs, BEST score: {:04.2f}' .format(epoch, no_imprv_epoch_count, best_score)) # self.save_session(epoch) # save the last one break self.logger.info('Training process done...')
def __iter__(self): if self.model is not None: # Training started self.epoch_number += 1 print 'STARTING EPOCH : (%d/%d)' % (self.epoch_number, self.n_epochs) sys.stdout.flush() self.bar = Progbar(len(self.data)) for idx, line in enumerate(self.data): self.bar.update(idx + 1) line = line.lower() if self.lowercase else line yield self.l_en.tokenize_sent(line) if self.model is not None: if self.epoch_number != self.n_epochs: SAVE_FILE_NAME = self.model_prefix + '_iter_' + str( self.epoch_number) + '.model' else: # Last Epoch SAVE_FILE_NAME = self.model_prefix + '.model' self.model.save(SAVE_FILE_NAME)
def predict(self, sess, test_set, batch_size): ''' The step to predict the image ''' X_te, Y_te = test_set prog = Progbar(target=int(math.ceil(X_te.shape[0] / batch_size))) probs = [] for i, idx in enumerate(get_indicies(X_te.shape[0], batch_size, False)): te_x, _ = minibatch(X_te, Y_te, idx, resize) feed = self.create_feed_dict(te_x, labels_batch=None, is_training=False) probs.append(sess.run(tf.nn.softmax(self.pred), feed)) prog.update(i + 1, None) probs = np.concatenate(probs) preds = np.argmax(probs, axis=1) return preds, probs
def train_with_partial_data(self, X_train, y_train, X_unlabeled, y_unlabeled, mode='codl'): mode = mode.lower() assert mode in set(['codl', 'em']), "Found unknown mode %s" % (mode) NUM_ITERATIONS = 5 if mode == 'codl': # CoDL gamma = 0.9 original_params = copy.deepcopy(self.state_dict()) original_rho = self.constraint_penalty['AT_LEAST_ONE_ATTR'] data_X = X_unlabeled for ix in xrange(NUM_ITERATIONS): print '\tStarting CoDL Iteration : %d / %d' % (ix + 1, NUM_ITERATIONS) print '\t Making %d Predictions ' % (len(X_unlabeled)) y_predictions = self.predict(X_unlabeled, mode='ccm', partial_labels=y_unlabeled, use_bar=True) if type(y_predictions) != list: y_predictions = [y_predictions] data_y = y_predictions print '\t Training on %d Observations ' % (len(data_X)) self.set_constraint_penalties(data_X, data_y) bar = Progbar(NUM_ITERATIONS) for ix in xrange(NUM_ITERATIONS): self.train_epoch(data_X, data_y, show_bar=False) bar.update(ix + 1) # Now update the parameters params = self.state_dict() self.constraint_penalty['AT_LEAST_ONE_ATTR'] = (gamma * original_rho) + ((1. - gamma) * self.constraint_penalty['AT_LEAST_ONE_ATTR']) for w in params: if w in original_params: params[w] = (gamma * original_params[w]) + ((1. - gamma) * params[w]) self.load_state_dict(params) else: # EM data_X = X_train + X_unlabeled for ix in xrange(NUM_ITERATIONS): print '\tStarting EM Iteration : %d / %d' % (ix + 1, NUM_ITERATIONS) # 1.1 E Step : Make predictions print '\t Making %d Predictions ' % (len(X_unlabeled)) y_predictions = self.predict(X_unlabeled, mode='ccm', partial_labels=y_unlabeled, use_bar=True) if type(y_predictions) != list: y_predictions = [y_predictions] data_y = y_train + y_predictions print '\t Training on %d Observations ' % (len(data_X)) # 1.2 M Step : Maximize log likelihood # 1.2.1 Update the constraints self.set_constraint_penalties(data_X, data_y) # 1.2.2 Update the parameters bar = Progbar(NUM_ITERATIONS) for ix in xrange(NUM_ITERATIONS): self.train_epoch(data_X, data_y, show_bar=False) bar.update(ix + 1)
def evaluate_greedy(model, data_loader, test_examples, opt): model.eval() logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): logging.info('Running on GPU!') model.cuda() else: logging.info('Running on CPU!') logging.info('====================== Start Predicting =========================') progbar = Progbar(title='Testing', target=len(data_loader), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset)) ''' Note here each batch only contains one data example, thus decoder_probs is flattened ''' for i, (batch, example) in enumerate(zip(data_loader, test_examples)): src = batch.src logging.info('====================== %d =========================' % (i + 1)) logging.info('\nSource text: \n %s\n' % (' '.join([opt.id2word[wi] for wi in src.data.numpy()[0]]))) if torch.cuda.is_available(): src.cuda() # trg = Variable(torch.from_numpy(np.zeros((src.size(0), opt.max_sent_length), dtype='int64'))) trg = Variable(torch.LongTensor([[opt.word2id[pykp.io.BOS_WORD]] * opt.max_sent_length])) max_words_pred = model.greedy_predict(src, trg) progbar.update(None, i, []) sentence_pred = [opt.id2word[x] for x in max_words_pred] sentence_real = example['trg_str'] if '</s>' in sentence_real: index = sentence_real.index('</s>') sentence_pred = sentence_pred[:index] logging.info('\t\tPredicted : %s ' % (' '.join(sentence_pred))) logging.info('\t\tReal : %s ' % (sentence_real))
def _valid_error(data_loader, model, criterion, epoch, opt): progbar = Progbar(title='Validating', target=len(data_loader), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset)) model.eval() losses = [] # Note that the data should be shuffled every time for i, batch in enumerate(data_loader): # if i >= 100: # break one2many_batch, one2one_batch = batch src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch if torch.cuda.is_available(): src = src.cuda() trg = trg.cuda() trg_target = trg_target.cuda() trg_copy_target = trg_copy_target.cuda() src_ext = src_ext.cuda() decoder_log_probs, _, _ = model.forward(src, trg, src_ext) if not opt.copy_attention: loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size), trg_target.contiguous().view(-1)) else: loss = criterion( decoder_log_probs.contiguous().view( -1, opt.vocab_size + opt.max_unk_words), trg_copy_target.contiguous().view(-1)) losses.append(loss.data[0]) progbar.update(epoch, i, [('valid_loss', loss.data[0]), ('PPL', loss.data[0])]) return losses
def train(self, train_set, test_set, epochs, shuffle=True): self.cfg.logger.info("Start training...") self._add_summary() num_batches = len(train_set) cur_step = 0 cur_tolerance = 0 cur_test_loss = float("inf") for epoch in range(self.start_epoch, epochs + 1): if shuffle: random.shuffle(train_set) self.cfg.logger.info("Epoch {} / {}:".format(epoch, epochs)) prog = Progbar(target=num_batches) # nbatches for i, batch_data in enumerate(train_set): cur_step += 1 feed_dict = self._get_feed_dict(batch_data, keep_prob=self.cfg.keep_prob, lr=self.cfg.lr) _, loss, summary = self.sess.run( [self.train_op, self.loss, self.merged_summaries], feed_dict=feed_dict) perplexity = math.exp( float(loss)) if loss < 300 else float("inf") prog.update(i + 1, [("Global Step", int(cur_step)), ("Train Loss", loss), ("Perplexity", perplexity)]) if cur_step % 10 == 0: self.summary_writer.add_summary(summary, cur_step) if self.cfg.use_lr_decay: # simple learning rate decay, performs each epoch self.cfg.lr *= self.cfg.lr_decay test_loss = self.evaluate(test_set, epoch) if test_loss <= cur_test_loss: self.save_session(epoch) # save model for each epoch cur_test_loss = test_loss else: cur_tolerance += 1 if cur_tolerance > self.cfg.no_imprv_tolerance: break self.cfg.logger.info( "Training process finished. Total trained steps: {}".format( cur_step))
def train(train_loader, model, criterion, optimizer, epoch, ): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') progress = Progbar(len(train_loader)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # if i >= 200: # print() # break # measure data loading time data_time.update(time.time() - end) images = images.cuda() target = target.cuda() # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, topk=(1,)) # losses.update(loss.item(), images.size(0)) # top1.update(acc1[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() suffix = [('loss', loss.item()), ('acc', acc1[0].cpu().numpy())] progress.update(i+1, suffix)
def run_epoch(self, train, dev, train_eval, epoch): """Performs one complete pass over the train set and evaluate on dev Args: train: dataset that yields tuple of sentences, tags dev: dataset epoch: (int) index of the current epoch Returns: f1: (python float), score to select model on, higher is better """ # progbar stuff for logging batch_size = self.config.batch_size nbatches = (len(train) + batch_size - 1) // batch_size prog = Progbar(target=nbatches) # iterate over dataset for i, (words, labels) in enumerate(minibatches(train, batch_size)): fd, _ = self.get_feed_dict(True, words, labels, lr=self.config.lr) _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=fd) prog.update(i + 1, values=[("train loss", train_loss)]) acc_train = self.evaluate(train_eval) acc_test = self.evaluate(dev) prog.update(i + 1, epoch, [("train loss", train_loss)], exact=[("dev acc", acc_test), ("train acc", acc_train), ("lr", self.config.lr)]) return acc_train, acc_test, train_loss
def validate(self): print('\n\nValidating epoch: %d' % self.epoch) total = len(self.dataset_val) val_generator = self.dataset_val.generator(self.options.batch_size) progbar = Progbar(total, width=25) for input_rgb in val_generator: feed_dic = {self.input_rgb: input_rgb} self.sess.run([self.dis_loss, self.gen_loss, self.accuracy], feed_dict=feed_dic) lossD, lossD_fake, lossD_real, lossG, lossG_l1, lossG_gan, acc, step = self.eval_outputs( feed_dic=feed_dic) progbar.add(len(input_rgb), values=[("D loss", lossD), ("D fake", lossD_fake), ("D real", lossD_real), ("G loss", lossG), ("G L1", lossG_l1), ("G gan", lossG_gan), ("accuracy", acc)]) print('\n')
def _valid_error(data_loader, model, criterion, epoch, opt): progbar = Progbar(title='Validating', target=len(data_loader), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset)) model.eval() losses = [] # Note that the data should be shuffled every time for i, batch in enumerate(data_loader): # if i >= 100: # break one2many_batch, one2one_batch = batch src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch if torch.cuda.is_available(): src = src.cuda() trg = trg.cuda() trg_target = trg_target.cuda() trg_copy_target = trg_copy_target.cuda() src_ext = src_ext.cuda() decoder_log_probs, _, _ = model.forward(src, trg, src_ext) if not opt.copy_model: loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size), trg_target.contiguous().view(-1) ) else: loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size + opt.max_unk_words), trg_copy_target.contiguous().view(-1) ) losses.append(loss.data[0]) progbar.update(epoch, i, [('valid_loss', loss.data[0]), ('PPL', loss.data[0])]) return losses
def train(conf): train_loader, test_loader = load_dataset(512) net = VRNN(conf.x_dim, conf.h_dim, conf.z_dim) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.cuda.manual_seed_all(112858) net.to(device) net = torch.nn.DataParallel(net, device_ids=[0, 1]) if conf.restore == True: net.load_state_dict( torch.load(conf.checkpoint_path, map_location='cuda:0')) print('Restore model from ' + conf.checkpoint_path) optimizer = optim.Adam(net.parameters(), lr=0.001) for ep in range(1, conf.train_epoch + 1): prog = Progbar(target=117) print("At epoch:{}".format(str(ep))) for i, (data, target) in enumerate(train_loader): data = data.squeeze(1) data = (data / 255).to(device) package = net(data) loss = Loss(package, data) net.zero_grad() loss.backward() _ = torch.nn.utils.clip_grad_norm_(net.parameters(), 5) optimizer.step() prog.update(i, exact=[("Training Loss", loss.item())]) with torch.no_grad(): x_decoded = net.module.sampling(conf.x_dim, device) x_decoded = x_decoded.cpu().numpy() digit = x_decoded.reshape(conf.x_dim, conf.x_dim) plt.imshow(digit, cmap='Greys_r') plt.pause(1e-6) if ep % conf.save_every == 0: torch.save(net.state_dict(), '../checkpoint/Epoch_' + str(ep + 1) + '.pth')
def train(self, trainset, devset, testset, batch_size=64, epochs=50, shuffle=True): self.logger.info('Start training...') init_lr = self.cfg.lr # initial learning rate, used for decay learning rate best_score = 0.0 # record the best score best_score_epoch = 1 # record the epoch of the best score obtained no_imprv_epoch = 0 # no improvement patience counter for epoch in range(self.start_epoch, epochs + 1): self.logger.info('Epoch %2d/%2d:' % (epoch, epochs)) progbar = Progbar(target=(len(trainset) + batch_size - 1) // batch_size) # number of batches if shuffle: np.random.shuffle(trainset) # shuffle training dataset each epoch # training each epoch for i, (words, labels) in enumerate(batch_iter(trainset, batch_size)): feed_dict = self._get_feed_dict(words, labels, lr=self.cfg.lr, is_train=True) _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict) progbar.update(i + 1, [("train loss", train_loss)]) if devset is not None: self.evaluate(devset, batch_size) cur_score = self.evaluate(testset, batch_size, is_devset=False) # learning rate decay if self.cfg.decay_lr: self.cfg.lr = init_lr / (1 + self.cfg.lr_decay * epoch) # performs model saving and evaluating on test dataset if cur_score > best_score: no_imprv_epoch = 0 self.save_session(epoch) best_score = cur_score best_score_epoch = epoch self.logger.info(' -- new BEST score on TEST dataset: {:05.3f}'.format(best_score)) else: no_imprv_epoch += 1 if no_imprv_epoch >= self.cfg.no_imprv_patience: self.logger.info('early stop at {}th epoch without improvement for {} epochs, BEST score: ' '{:05.3f} at epoch {}'.format(epoch, no_imprv_epoch, best_score, best_score_epoch)) break self.logger.info('Training process done...')
def train_model(model, optimizer, criterion, training_data_loader, validation_data_loader, opt): logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on GPU! devices=%s' % str(opt.gpuid)) model = model.cuda() model = nn.DataParallel(model, device_ids=opt.gpuid) criterion.cuda() else: logging.info('Running on CPU!') logging.info('====================== Start Training =========================') train_history_losses = [] valid_history_losses = [] best_loss = sys.float_info.max train_losses = [] total_batch = 0 early_stop_flag = False for epoch in range(opt.start_epoch , opt.epochs): if early_stop_flag: break progbar = Progbar(title='Training', target=len(training_data_loader), batch_size=opt.batch_size, total_examples=len(training_data_loader.dataset)) model.train() for batch_i, batch in enumerate(training_data_loader): batch_i += 1 # for the aesthetics of printing total_batch += 1 src = batch.src trg = batch.trg # print("src size - ",src.size()) # print("target size - ",trg.size()) if torch.cuda.is_available(): src.cuda() trg.cuda() optimizer.zero_grad() decoder_logits, _, _ = model.forward(src, trg, must_teacher_forcing=False) start_time = time.time() # remove the 1st word in trg to let predictions and real goal match loss = criterion( decoder_logits.contiguous().view(-1, opt.vocab_size), trg[:, 1:].contiguous().view(-1) ) print("--loss calculation- %s seconds ---" % (time.time() - start_time)) start_time = time.time() loss.backward() print("--backward- %s seconds ---" % (time.time() - start_time)) if opt.max_grad_norm > 0: pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm) after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2) logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm)) optimizer.step() train_losses.append(loss.data[0]) perplexity = np.math.exp(loss.data[0]) progbar.update(epoch, batch_i, [('train_loss', loss.data[0]), ('perplexity', perplexity)]) if batch_i > 1 and batch_i % opt.report_every == 0: logging.info('====================== %d =========================' % (batch_i)) logging.info('Epoch : %d Minibatch : %d, Loss=%.5f, PPL=%.5f' % (epoch, batch_i, np.mean(loss.data[0]), perplexity)) sampled_size = 2 logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size) # softmax logits to get probabilities (batch_size, trg_len, vocab_size) # decoder_probs = torch.nn.functional.softmax(decoder_logits.view(trg.size(0) * trg.size(1), -1)).view(*trg.size(), -1) if torch.cuda.is_available(): src = src.data.cpu().numpy() decoder_logits = decoder_logits.data.cpu().numpy() max_words_pred = decoder_logits.argmax(axis=-1) trg = trg.data.cpu().numpy() else: src = src.data.numpy() decoder_logits = decoder_logits.data.numpy() max_words_pred = decoder_logits.argmax(axis=-1) trg = trg.data.numpy() sampled_trg_idx = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size) src = src[sampled_trg_idx] max_words_pred = [max_words_pred[i] for i in sampled_trg_idx] decoder_logits = decoder_logits[sampled_trg_idx] trg = [trg[i][1:] for i in sampled_trg_idx] # the real target has removed the starting <BOS> for i, (src_wi, pred_wi, real_wi) in enumerate(zip(src, max_words_pred, trg)): nll_prob = -np.sum(np.log2([decoder_logits[i][l][pred_wi[l]] for l in range(len(real_wi))])) sentence_source = [opt.id2word[x] for x in src_wi] sentence_pred = [opt.id2word[x] for x in pred_wi] sentence_real = [opt.id2word[x] for x in real_wi] logging.info('==================================================') logging.info('Source: %s ' % (' '.join(sentence_source))) logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob)) logging.info('\t\tReal : %s ' % (' '.join(sentence_real))) if total_batch > 1 and total_batch % opt.run_valid_every == 0: logging.info('*' * 50) logging.info('Run validation test @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch)) valid_losses = _valid(validation_data_loader, model, criterion, optimizer, epoch, opt, is_train=False) train_history_losses.append(copy.copy(train_losses)) valid_history_losses.append(valid_losses) train_losses = [] # Plot the learning curve plot_learning_curve(train_history_losses, valid_history_losses, 'Training and Validation', curve1_name='Training Error', curve2_name='Validation Error', save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_curve.png' % (epoch, batch_i, total_batch)) ''' determine if early stop training ''' valid_loss = np.average(valid_history_losses[-1]) is_best_loss = valid_loss < best_loss rate_of_change = float(valid_loss - best_loss) / float(best_loss) # only store the checkpoints that make better validation performances if total_batch > 1 and epoch >= opt.start_checkpoint_at and (total_batch % opt.save_model_every == 0 or is_best_loss): # Save the checkpoint logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model')) torch.save( model.state_dict(), open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb') ) # valid error doesn't decrease if rate_of_change >= 0: stop_increasing += 1 else: stop_increasing = 0 if is_best_loss: logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( best_loss, valid_loss, rate_of_change * 100)) else: logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( stop_increasing, best_loss, valid_loss, rate_of_change * 100)) best_loss = min(valid_loss, best_loss) if stop_increasing >= opt.early_stop_tolerance: logging.info('Have not increased for %d epoches, early stop training' % stop_increasing) early_stop_flag = True break logging.info('*' * 50)
def train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt): generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length ) logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on GPU! devices=%s' % str(opt.gpuid)) # model = nn.DataParallel(model, device_ids=opt.gpuid) else: logging.info('Running on CPU!') logging.info('====================== Start Training =========================') checkpoint_names = [] train_history_losses = [] valid_history_losses = [] test_history_losses = [] # best_loss = sys.float_info.max # for normal training/testing loss (likelihood) best_loss = 0.0 # for f-score stop_increasing = 0 train_losses = [] total_batch = 0 early_stop_flag = False if opt.train_from: state_path = opt.train_from.replace('.model', '.state') logging.info('Loading training state from: %s' % state_path) if os.path.exists(state_path): (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses) = torch.load(open(state_path, 'rb')) opt.start_epoch = epoch for epoch in range(opt.start_epoch , opt.epochs): if early_stop_flag: break progbar = Progbar(title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size, total_examples=len(train_data_loader.dataset)) for batch_i, batch in enumerate(train_data_loader): model.train() batch_i += 1 # for the aesthetics of printing total_batch += 1 one2many_batch, one2one_batch = batch src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch max_oov_number = max([len(oov) for oov in oov_lists]) print("src size - ",src.size()) print("target size - ",trg.size()) if torch.cuda.is_available(): src = src.cuda() trg = trg.cuda() trg_target = trg_target.cuda() trg_copy_target = trg_copy_target.cuda() src_ext = src_ext.cuda() optimizer.zero_grad() ''' Training with Maximum Likelihood (word-level error) ''' decoder_log_probs, _, _ = model.forward(src, trg, src_ext, oov_lists) # simply average losses of all the predicitons # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)! start_time = time.time() if not opt.copy_model: ml_loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size), trg_target.contiguous().view(-1) ) else: ml_loss = criterion( decoder_log_probs.contiguous().view(-1, opt.vocab_size + max_oov_number), trg_copy_target.contiguous().view(-1) ) ''' Training with Reinforcement Learning (instance-level reward f-score) ''' src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() rl_loss = get_loss_rl() start_time = time.time() ml_loss.backward() print("--backward- %s seconds ---" % (time.time() - start_time)) if opt.max_grad_norm > 0: pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm) after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2) logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm)) optimizer.step() train_losses.append(ml_loss.data[0]) progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]), ('PPL', ml_loss.data[0])]) if batch_i > 1 and batch_i % opt.report_every == 0: logging.info('====================== %d =========================' % (batch_i)) logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' % (epoch, batch_i, np.mean(ml_loss.data[0]))) sampled_size = 2 logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size) if torch.cuda.is_available(): src = src.data.cpu().numpy() decoder_log_probs = decoder_log_probs.data.cpu().numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.cpu().numpy() trg_copy_target = trg_copy_target.data.cpu().numpy() else: src = src.data.numpy() decoder_log_probs = decoder_log_probs.data.numpy() max_words_pred = decoder_log_probs.argmax(axis=-1) trg_target = trg_target.data.numpy() trg_copy_target = trg_copy_target.data.numpy() sampled_trg_idx = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size) src = src[sampled_trg_idx] oov_lists = [oov_lists[i] for i in sampled_trg_idx] max_words_pred = [max_words_pred[i] for i in sampled_trg_idx] decoder_log_probs = decoder_log_probs[sampled_trg_idx] if not opt.copy_model: trg_target = [trg_target[i] for i in sampled_trg_idx] # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth) else: trg_target = [trg_copy_target[i] for i in sampled_trg_idx] for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate(zip(src, max_words_pred, trg_target, oov_lists)): nll_prob = -np.sum([decoder_log_probs[i][l][pred_wi[l]] for l in range(len(trg_i))]) find_copy = np.any([x >= opt.vocab_size for x in src_wi]) has_copy = np.any([x >= opt.vocab_size for x in trg_i]) sentence_source = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in src_wi] sentence_pred = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in pred_wi] sentence_real = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in trg_i] sentence_source = sentence_source[:sentence_source.index('<pad>')] if '<pad>' in sentence_source else sentence_source sentence_pred = sentence_pred[:sentence_pred.index('<pad>')] if '<pad>' in sentence_pred else sentence_pred sentence_real = sentence_real[:sentence_real.index('<pad>')] if '<pad>' in sentence_real else sentence_real logging.info('==================================================') logging.info('Source: %s ' % (' '.join(sentence_source))) logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob) + (' [FIND COPY]' if find_copy else '')) logging.info('\t\tReal : %s ' % (' '.join(sentence_real)) + (' [HAS COPY]' + str(trg_i) if has_copy else '')) if total_batch > 1 and total_batch % opt.run_valid_every == 0: logging.info('*' * 50) logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch)) # valid_losses = _valid_error(valid_data_loader, model, criterion, epoch, opt) # valid_history_losses.append(valid_losses) valid_score_dict = evaluate_beam_search(generator, valid_data_loader, opt, title='valid', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) test_score_dict = evaluate_beam_search(generator, test_data_loader, opt, title='test', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch)) checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch)) train_history_losses.append(copy.copy(train_losses)) valid_history_losses.append(valid_score_dict) test_history_losses.append(test_score_dict) train_losses = [] scores = [train_history_losses] curve_names = ['Training Error'] scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names] curve_names += ['Valid-'+name for name in opt.report_score_names] scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names] curve_names += ['Test-'+name for name in opt.report_score_names] scores = [np.asarray(s) for s in scores] # Plot the learning curve plot_learning_curve(scores=scores, curve_names=curve_names, checkpoint_names=checkpoint_names, title='Training Validation & Test', save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch)) ''' determine if early stop training (whether f-score increased, before is if valid error decreased) ''' valid_loss = np.average(valid_history_losses[-1][opt.report_score_names[0]]) is_best_loss = valid_loss > best_loss rate_of_change = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0 # valid error doesn't increase if rate_of_change <= 0: stop_increasing += 1 else: stop_increasing = 0 if is_best_loss: logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( best_loss, valid_loss, rate_of_change * 100)) else: logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % ( stop_increasing, best_loss, valid_loss, rate_of_change * 100)) best_loss = max(valid_loss, best_loss) # only store the checkpoints that make better validation performances if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss): #epoch >= opt.start_checkpoint_at and # Save the checkpoint logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model')) torch.save( model.state_dict(), open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb') ) torch.save( (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses), open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb') ) if stop_increasing >= opt.early_stop_tolerance: logging.info('Have not increased for %d epoches, early stop training' % stop_increasing) early_stop_flag = True break logging.info('*' * 50)
def train(self, X_train, X_mask, Y_train, Y_mask, input, output, verbose, optimizer): train_set_x = theano.shared(np.asarray(X_train, dtype="int32"), borrow=True) train_set_y = theano.shared(np.asarray(Y_train, dtype="int32"), borrow=True) mask_set_x = theano.shared(np.asarray(X_mask, dtype="float32"), borrow=True) mask_set_y = theano.shared(np.asarray(Y_mask, dtype="float32"), borrow=True) index = T.lscalar("index") # index to a case lr = T.scalar("lr", dtype=theano.config.floatX) mom = T.scalar("mom", dtype=theano.config.floatX) # momentum n_ex = T.lscalar("n_ex") sindex = T.lscalar("sindex") # index to a case ### batch batch_start = index * self.n_batch batch_stop = T.minimum(n_ex, (index + 1) * self.n_batch) effective_batch_size = batch_stop - batch_start get_batch_size = theano.function(inputs=[index, n_ex], outputs=effective_batch_size) cost = self.loss(self.y, self.y_mask) + self.L1_reg * self.L1 updates = eval(optimizer)(self.params, cost, mom, lr) """ compute_val_error = theano.function(inputs = [index,n_ex ], outputs = self.loss(self.y,self.y_mask), givens = { self.x: train_set_x[:,batch_start:batch_stop], self.y: train_set_y[:,batch_start:batch_stop], self.x_mask: mask_set_x[:,batch_start:batch_stop], self.y_mask: mask_set_y[:,batch_start:batch_stop] }, mode = mode) """ train_model = theano.function( inputs=[index, lr, mom, n_ex], outputs=[cost, self.loss(self.y, self.y_mask)], updates=updates, givens={ self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop], self.x_mask: mask_set_x[:, batch_start:batch_stop], self.y_mask: mask_set_y[:, batch_start:batch_stop], }, mode=mode, on_unused_input="ignore", ) ############### # TRAIN MODEL # ############### print "Training model ..." epoch = 0 n_train = train_set_x.get_value(borrow=True).shape[1] n_train_batches = int(np.ceil(1.0 * n_train / self.n_batch)) if optimizer is not "SGD": self.learning_rate_decay = 1 while epoch < self.n_epochs: epoch = epoch + 1 if verbose == 1: progbar = Progbar(n_train_batches) train_losses = [] train_batch_sizes = [] for idx in xrange(n_train_batches): effective_momentum = ( self.final_momentum if (epoch + len(self.errors)) > self.momentum_switchover else self.initial_momentum ) cost = train_model(idx, self.lr, effective_momentum, n_train) train_losses.append(cost[1]) train_batch_sizes.append(get_batch_size(idx, n_train)) if verbose == 1: progbar.update(idx + 1) this_train_loss = np.average(train_losses, weights=train_batch_sizes) self.errors.append(this_train_loss) print ("epoch %i, train loss %f " "lr: %f" % (epoch, this_train_loss, self.lr)) ### autimatically saving snapshot .. if np.mod(epoch, self.snapshot) == 0: if epoch is not n_train_batches: self.save() ### generating sample.. if np.mod(epoch, self.sample_Freq) == 0: print "Generating a sample..." i = np.random.randint(1, n_train) test = X_train[:, i] truth = Y_train[:, i] guess = self.gen_sample(test, X_mask[:, i]) print "Input: ", " ".join(input.sequences_to_text(test)) print "Truth: ", " ".join(output.sequences_to_text(truth)) print "Sample: ", " ".join(output.sequences_to_text(guess[1])) """ # compute loss on validation set if np.mod(epoch,self.val_Freq)==0: val_losses = [compute_val_error(i, n_train) for i in xrange(n_train_batches)] val_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_train_batches)] this_val_loss = np.average(val_losses, weights=val_batch_sizes) """ self.lr *= self.learning_rate_decay
# create function for training and making prediction f_train = theano.function(inputs=[X, y_true], outputs=cost, updates=update, allow_input_downcast=True) f_predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True) NB_EPOCH = 2 BATCH_SIZE = 128 LEARNING_RATE = 0.1 learning_rate.set_value(np.cast['float32'](LEARNING_RATE)) training_history = [] valid_history = [] for epoch in range(NB_EPOCH): prog = Progbar(target=X_train.shape[0]) n = 0 history = [] while n < X_train.shape[0]: start = n end = min(n + BATCH_SIZE, X_train.shape[0]) c = f_train(X_train[start:end], y_train[start:end]) prog.title = 'Epoch: %.2d, Cost: %.4f' % (epoch + 1, c) prog.add(end - start) n += BATCH_SIZE history.append(c) # end of epoch, start validating y = np.argmax(f_predict(X_valid), axis=-1) accuracy = accuracy_score(y_valid, y) print('Validation accuracy:', accuracy) # save history
def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, save_path=None): logging = config.init_logging(title, save_path + '/%s.log' % title) progbar = Progbar(logger=logging, title=title, target=len(data_loader.dataset.examples), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset.examples)) example_idx = 0 score_dict = {} # {'precision@5':[],'recall@5':[],'f1score@5':[], 'precision@10':[],'recall@10':[],'f1score@10':[]} for i, batch in enumerate(data_loader): # if i > 3: # break one2many_batch, one2one_batch = batch src_list, src_len, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() print("batch size - %s" % str(src_list.size(0))) # print("src size - %s" % str(src_list.size())) # print("target size - %s" % len(trg_copy_target_list)) pred_seq_list = generator.beam_search(src_list, src_len, src_oov_map_list, oov_list, opt.word2id) ''' process each example in current batch ''' for src, src_str, trg, trg_str_seqs, trg_copy, pred_seq, oov in zip(src_list, src_str_list, trg_list, trg_str_list, trg_copy_target_list, pred_seq_list, oov_list): # logging.info('====================== %d =========================' % (example_idx)) print_out = '' print_out += '[Source][%d]: %s \n' % (len(src_str), ' '.join(src_str)) # src = src.cpu().data.numpy() if torch.cuda.is_available() else src.data.numpy() # print_out += '\nSource Input: \n %s\n' % (' '.join([opt.id2word[x] for x in src[:len(src_str) + 5]])) # print_out += 'Real Target String [%d] \n\t\t%s \n' % (len(trg_str_seqs), trg_str_seqs) # print_out += 'Real Target Input: \n\t\t%s \n' % str([[opt.id2word[x] for x in t] for t in trg]) # print_out += 'Real Target Copy: \n\t\t%s \n' % str([[opt.id2word[x] if x < opt.vocab_size else oov[x - opt.vocab_size] for x in t] for t in trg_copy]) trg_str_is_present = if_present_duplicate_phrase(src_str, trg_str_seqs) print_out += '[GROUND-TRUTH] #(present)/#(all targets)=%d/%d\n' % (sum(trg_str_is_present), len(trg_str_is_present)) print_out += '\n'.join(['\t\t[%s]' % ' '.join(phrase) if is_present else '\t\t%s' % ' '.join(phrase) for phrase, is_present in zip(trg_str_seqs, trg_str_is_present)]) print_out += '\noov_list: \n\t\t%s \n' % str(oov) # 1st filtering pred_is_valid, processed_pred_seqs, processed_pred_str_seqs, processed_pred_score = process_predseqs(pred_seq, oov, opt.id2word, opt) # 2nd filtering: if filter out phrases that don't appear in text, and keep unique ones after stemming if opt.must_appear_in_src: pred_is_present = if_present_duplicate_phrase(src_str, processed_pred_str_seqs) trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present] else: pred_is_present = [True] * len(processed_pred_str_seqs) valid_and_present = np.asarray(pred_is_valid) * np.asarray(pred_is_present) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=processed_pred_str_seqs) print_out += '[PREDICTION] #(valid)=%d, #(present)=%d, #(retained&present)=%d, #(all)=%d\n' % (sum(pred_is_valid), sum(pred_is_present), sum(valid_and_present), len(pred_seq)) print_out += '' ''' Print and export predictions ''' preds_out = '' for p_id, (seq, word, score, match, is_valid, is_present) in enumerate( zip(processed_pred_seqs, processed_pred_str_seqs, processed_pred_score, match_list, pred_is_valid, pred_is_present)): # if p_id > 5: # break preds_out += '%s\n' % (' '.join(word)) if is_present: print_phrase = '[%s]' % ' '.join(word) else: print_phrase = ' '.join(word) if is_valid: print_phrase = '*%s' % print_phrase if match == 1.0: correct_str = '[correct!]' else: correct_str = '' if any([t >= opt.vocab_size for t in seq.sentence]): copy_str = '[copied!]' else: copy_str = '' # print_out += '\t\t[%.4f]\t%s \t %s %s%s\n' % (-score, print_phrase, str(seq.sentence), correct_str, copy_str) ''' Evaluate predictions w.r.t different filterings and metrics ''' num_oneword_range = [-1, 1] topk_range = [5, 10] score_names = ['precision', 'recall', 'f_score'] processed_pred_seqs = np.asarray(processed_pred_seqs)[valid_and_present] processed_pred_str_seqs = np.asarray(processed_pred_str_seqs)[valid_and_present] processed_pred_score = np.asarray(processed_pred_score)[valid_and_present] for num_oneword_seq in num_oneword_range: # 3rd round filtering (one-word phrases) filtered_pred_seq, filtered_pred_str_seqs, filtered_pred_score = post_process_predseqs((processed_pred_seqs, processed_pred_str_seqs, processed_pred_score), num_oneword_seq) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=filtered_pred_str_seqs) assert len(filtered_pred_seq) == len(filtered_pred_str_seqs) == len(filtered_pred_score) == len(match_list) for topk in topk_range: results = evaluate(match_list, filtered_pred_seq, trg_str_seqs, topk=topk) for k, v in zip(score_names, results): if '%s@%d#oneword=%d' % (k, topk, num_oneword_seq) not in score_dict: score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)] = [] score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)].append(v) print_out += '\t%s@%d#oneword=%d = %f\n' % (k, topk, num_oneword_seq, v) # logging.info(print_out) if save_path: if not os.path.exists(os.path.join(save_path, title + '_detail')): os.makedirs(os.path.join(save_path, title + '_detail')) with open(os.path.join(save_path, title + '_detail', str(example_idx) + '_print.txt'), 'w') as f_: f_.write(print_out) with open(os.path.join(save_path, title + '_detail', str(example_idx) + '_prediction.txt'), 'w') as f_: f_.write(preds_out) progbar.update(epoch, example_idx, [('f_score@5#oneword=-1', np.average(score_dict['f_score@5#oneword=-1'])), ('f_score@10#oneword=-1', np.average(score_dict['f_score@10#oneword=-1']))]) example_idx += 1 print('#(f_score@5#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=-1']), sum(score_dict['f_score@5#oneword=-1']))) print('#(f_score@10#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=-1']), sum(score_dict['f_score@10#oneword=-1']))) print('#(f_score@5#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=1']), sum(score_dict['f_score@5#oneword=1']))) print('#(f_score@10#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=1']), sum(score_dict['f_score@10#oneword=1']))) if save_path: # export scores. Each row is scores (precision, recall and f-score) of different way of filtering predictions (how many one-word predictions to keep) with open(save_path + os.path.sep + title + '_result.csv', 'w') as result_csv: csv_lines = [] for num_oneword_seq in num_oneword_range: for topk in topk_range: csv_line = '#oneword=%d,@%d' % (num_oneword_seq, topk) for k in score_names: csv_line += ',%f' % np.average(score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)]) csv_lines.append(csv_line + '\n') result_csv.writelines(csv_lines) # precision, recall, f_score = macro_averaged_score(precisionlist=score_dict['precision'], recalllist=score_dict['recall']) # logging.info("Macro@5\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@5']), np.average(score_dict['recall@5']), np.average(score_dict['f1score@5']))) # logging.info("Macro@10\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@10']), np.average(score_dict['recall@10']), np.average(score_dict['f1score@10']))) # precision, recall, f_score = evaluate(true_seqs=target_all, pred_seqs=prediction_all, topn=5) # logging.info("micro precision %.4f , micro recall %.4f, micro fscore %.4f " % (precision, recall, f_score)) return score_dict