class Train(): def __init__(self, difficulty): self.data_path = "../data" self.model_path = "../models" self.output_path = "../outputs" self.difficulty = difficulty self.timestamp = str(int(time.time())) self.model_name = "gru_" + self.difficulty self.data = Data(difficulty=self.difficulty, data_path=self.data_path) (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK, self.PAD) = self.data() self.train = list(self.data.get_train_data()) self.dev = list(self.data.get_validation_data()) self.test = list(self.data.get_test_data()) self.image_feature_size = 2048 self.output_vector_size = 10 def __call__(self, number_of_iterations=2, learning_rate=0.005, embedding_size=300, hidden_size=100, batch_size=100): print("Starting 'Image Retrieval' in 'GRU' mode with '" + self.difficulty + "' data") self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str( learning_rate) + "_" + str(embedding_size) + ".pty" self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str( learning_rate) + "_" + str(embedding_size) + ".csv" self.number_of_iterations = number_of_iterations self.learning_rate = learning_rate self.embedding_size = embedding_size self.hidden_size = hidden_size self.batch_size = batch_size self.model = GRU(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size) self.criterion = nn.CrossEntropyLoss() self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size) print(self.model) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.train_loss_values = [] self.magic() self.save_model() self.save_data() def minibatch(self, data, batch_size=50): for i in range(0, len(data), batch_size): yield data[i:i + batch_size] def preprocess(self, batch): """Helper function for functional batches""" correct_indexes = [observation[2] for observation in batch] img_ids = [observation[1] for observation in batch] text_features = [observation[0] for observation in batch] last_words = [len(dialog) for dialog in text_features] #Add Padding to max len of sentence in batch max_length = max(map(len, text_features)) text_features = [ txt + [self.PAD] * (max_length - len(txt)) for txt in text_features ] #return in "stacked" format, added last_words for excluding padding effects on GRU return text_features, img_ids, correct_indexes, last_words def magic(self): for ITER in range(self.number_of_iterations): random.shuffle(self.train) train_loss = 0.0 start = time.time() iteration = 0 for batch in self.minibatch(self.train, self.batch_size): self.model.zero_grad() self.optimizer.zero_grad() self.model.hidden = self.model.init_hidden() #Load data for model text_features, h5_ids, correct_index, last_words = self.preprocess( batch) lookup_text_tensor = Variable(torch.LongTensor([text_features ])).squeeze() full_img_batch = np.empty([ len(batch), self.output_vector_size, self.image_feature_size ]) for obs, img_ids in enumerate(h5_ids): for index, h5_id in enumerate(img_ids): full_img_batch[obs, index] = self.img_features[h5_id] full_img_batch = Variable( torch.from_numpy(full_img_batch).type(torch.FloatTensor)) #Target target = Variable(torch.LongTensor([correct_index])).squeeze() #Vector for excluding padding effects last_words = Variable(torch.LongTensor(last_words)) #Run model and calculate loss prediction = self.model(lookup_text_tensor, full_img_batch, last_words) loss = self.criterion(prediction, target) train_loss += loss.data[0] iteration += self.batch_size print(iteration) loss.backward() self.optimizer.step() print( "ITERATION %r: train loss/sent=%.4f, time=%.2fs" % (ITER + 1, train_loss / len(self.train), time.time() - start)) self.train_loss_values.append(train_loss / len(self.train)) def save_model(self): #Save model torch.save(self.model, self.model_full_path) print("Saved model has test score", self.evaluate(self.test, self.batch_size)) def plot(self): plt.plot(self.train_loss_values, label="Train loss") plt.legend(loc='best') plt.xlabel("Epochs") plt.ylabel("Loss") plt.title(self.model_name + " - has loss with lr = %.4f, embedding size = %r" % (self.learning_rate, self.embedding_size)) plt.show() def save_data(self): file = open(self.output_file_name, "w") file.write(", ".join(map(str, self.train_loss_values))) file.write("\n") file.write(str(self.evaluate(self.test, self.batch_size))) file.write("\n") file.close()
batch_size = 256 hidden_size = 128 num_layers = 1 dropout = 0 testnum = 500 # interval is sample interval between last input and first output. interval = 0 epoch = 100 device = 'cuda' # Generate sin dataset for training and testing. dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)]) x_train, y_train, x_test, y_test, normalizer = generate_data( dataset, 'minmax', input_length, output_length, testnum, interval) # Build, train and predict. model = GRU(1, hidden_size, num_layers, 1, dropout) optimizer = opt.Adam(model.parameters()) loss = nn.MSELoss() batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch, batch_size, optimizer, loss, device) y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer, batch_size) # Draw result plt.plot(y_predict, label='prediction') plt.plot(y_real, label='real') plt.legend() plt.show()
class Trainer(object): def __init__(self, config, h_loader, r_loader, test_loader): self.config = config self.h_loader = h_loader self.r_loader = r_loader self.test_loader = test_loader self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.n_epochs = config.n_epochs self.n_steps = config.n_steps self.log_interval = int(config.log_interval) # in case self.checkpoint_step = int(config.checkpoint_step) self.use_cuda = config.cuda self.outf = config.outf self.build_model() self.vis = vis_tool.Visualizer() def build_model(self): self.c2d = C2D().cuda() self.gru = GRU(self.c2d).cuda() def train(self): cfig = get_config() opt = optim.Adam(filter(lambda p: p.requires_grad, self.gru.parameters()), lr=self.lr, betas=(self.beta1, self.beta2), weight_decay=self.weight_decay) start_time = time.time() criterion = nn.BCELoss() max_acc = 0. for epoch in range(self.n_epochs): self.gru.train() epoch_loss = [] for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)): h_video = h[0] r_video = r[0] h_video = Variable(h_video).cuda() r_video = Variable(r_video).cuda() self.gru.zero_grad() predicted = self.gru(h_video) target = torch.ones(len(predicted), dtype=torch.float32).cuda() h_loss = criterion(predicted, target) # compute loss h_loss.backward() opt.step() self.gru.zero_grad() predicted = self.gru(r_video) # predicted snippet's score target = torch.zeros(len(predicted), dtype=torch.float32).cuda() r_loss = criterion(predicted, target) # compute loss r_loss.backward() opt.step() step_end_time = time.time() total_loss = r_loss + h_loss epoch_loss.append((total_loss.data).cpu().numpy()) print( '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f' % (epoch + 1, self.n_epochs, step + 1, self.n_steps, step_end_time - start_time, h_loss, r_loss, total_loss)) self.vis.plot( 'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (h_loss.data).cpu().numpy()) self.vis.plot( 'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (r_loss.data).cpu().numpy()) self.vis.plot("Avg loss plot", np.mean(epoch_loss)) # Test accuracy # self.gru.eval() # test_avg_acc = 0. # test_cnt = 0 # for idx, (video, label, filename) in enumerate(self.test_loader): # video = Variable(video).cuda() # predicted = self.gru(video) # [ frame 수, 1] # # predicted = predicted.view(1, -1) # predicted = predicted.cpu().detach().numpy() # # predicted = predicted[0] # label = label.cpu().numpy() # # # print(type(predicted), type(label)) # # gt_label_predicted_score = predicted * label # gt_label_predicted_score = list(gt_label_predicted_score) # # # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy() # # print("Highlight frame predicted score:", gt_label_predicted_score) # # # print(gt_label_predicted_score) # # print(gt_label_predicted_score.shape) # # # print(gt_label_predicted_score) # # for sc in gt_label_predicted_score[0]: # if sc != 0.: # print("%.3f" % sc, end=' ') # # for i in range(len(predicted)): # if predicted[i] >= 0.45: # predicted[i] = 1. # else: # predicted[i] = 0. # # # print("After threshold predicted:", predicted) # # print("Actual label:", label) # # acc = (predicted == label).sum().item() / float(len(predicted)) # print("filename: %s accuracy: %.4f" % (filename, acc)) # test_avg_acc += acc # test_cnt += 1 # # print() # # test_avg_acc = test_avg_acc / test_cnt # print("Epoch %d Test accuracy: %.5f" % (epoch+1, test_avg_acc)) # self.vis.plot("Test Accuracy plot", test_avg_acc) # print("Epoch %d predicted output list" % (epoch+1), output_list) # save max test accuracy checkpoint # if test_avg_acc >= max_acc: # max_acc = test_avg_acc # torch.save(self.gru.state_dict(), 'max_test_acc_chkpoint' + str(epoch + 1) + '.pth') # print("checkpoint saved") if epoch % self.checkpoint_step == 0: accuracy, savelist = self.test(self.test_loader) if accuracy > max_acc: max_acc = accuracy torch.save( self.gru.state_dict(), './samples/lr_%.4f_chkpoint' % cfig.lr + str(epoch + 1) + '.pth') for f in savelist: np.save("./samples/" + f[0][0] + ".npy", f[1]) print(np.load("./samples/testRV04(198,360).mp4.npy")) print("checkpoint saved") def test(self, t_loader): # self.gru.eval() # accuracy = 0. # # savelist = [] # # total_len = len(t_loader) # # for step, (tv, label, filename) in enumerate(t_loader): # filename = filename[0].split(".")[0] # # label = label.squeeze() # # start = 0 # end = 24 # # correct = 0 # count = 0 # # npy = np.zeros(tv.shape[1]) # # while end < tv.shape[1]: # # t_video = Variable(tv[:, start:end, :, :, :]).cuda() # predicted = self.gru(t_video) # # gt_label = label[start:end] # # if len(gt_label[gt_label == 1.]) > 12: # gt_label = torch.ones(predicted.shape, dtype=torch.float32).cuda() # # else: # gt_label = torch.zeros(predicted.shape, dtype=torch.float32).cuda() # # if predicted < 0.5: # npy[start:end] = 1. # # predicted[predicted < 0.5] = 1. # predicted[predicted >= 0.5] = 0. # # correct += (predicted == gt_label).item() # # start += 24 # end += 24 # count += 1 # # accuracy += (correct / count) / total_len # # savelist.append([filename, npy]) # Test accuracy self.gru.eval() test_avg_acc = 0. test_cnt = 0 savelist = [] for idx, (video, label, filename) in enumerate(self.test_loader): video = Variable(video).cuda() predicted = self.gru(video) # [ frame 수, 1] predicted = predicted.view(1, -1) predicted = predicted.cpu().detach().numpy() predicted = predicted[0] label = label.cpu().numpy() # print(type(predicted), type(label)) gt_label_predicted_score = predicted * label gt_label_predicted_score = list(gt_label_predicted_score) # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy() # print("Highlight frame predicted score:", gt_label_predicted_score) # print(gt_label_predicted_score) # print(gt_label_predicted_score.shape) # print(gt_label_predicted_score) for sc in gt_label_predicted_score[0]: if sc != 0.: print("%.3f" % sc, end=' ') for i in range(len(predicted)): if predicted[i] >= 0.45: predicted[i] = 1. else: predicted[i] = 0. # print("After threshold predicted:", predicted) # print("Actual label:", label) acc = (predicted == label).sum().item() / float(len(predicted)) print("filename: %s accuracy: %.4f" % (filename, acc)) test_avg_acc += acc test_cnt += 1 savelist.append([filename, predicted]) print() test_avg_acc = test_avg_acc / test_cnt print("Accuracy:", round(test_avg_acc, 4)) self.vis.plot("Accuracy with lr:%.3f" % self.lr, test_avg_acc) return test_avg_acc, savelist
def main(): # Training settings parser = argparse.ArgumentParser(description='From PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=32, metavar='N', help='input batch size for training') parser.add_argument('--epochs', type=int, default=10000, metavar='E', help='number of epochs to train') parser.add_argument('--warmup-epochs', type=int, default=5000, metavar='WE', help='number of epochs to warmup') parser.add_argument('--num-steps', type=int, default=100, metavar='N', help='number of batches in one epochs') parser.add_argument('--num-points', type=int, default=10, metavar='NS', help='number of query points') parser.add_argument('--num-hidden', type=int, default=128, metavar='NE', help='number of hidden units') parser.add_argument('--lr', type=float, default=0.0003, metavar='LR', help='learning rate') parser.add_argument('--alpha', type=float, default=0, metavar='A', help='kl factor') parser.add_argument('--sampling', action='store_true', default=False, help='uses sampling') parser.add_argument('--direction', action='store_true', default=False, help='uses directed data-sets') parser.add_argument('--ranking', action='store_true', default=False, help='sort data-set according to importance') parser.add_argument('--num-runs', type=int, default=1, metavar='NR', help='number of runs') parser.add_argument('--save-path', default='trained_models/random_', help='directory to save results') parser.add_argument('--load-path', default='trained_models/default_model_0.pth', help='path to load model') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") for i in range(args.num_runs): writer = SummaryWriter() performance = torch.zeros(args.epochs) accuracy_test = 0 data_loader = PairedComparison(4, direction=args.direction, ranking=args.ranking) model = GRU(data_loader.num_inputs, data_loader.num_targets, args.num_hidden).to(device) if args.alpha > 0: print('Loading pretrained network...') params, _ = torch.load(args.load_path) model.load_state_dict(params) model.reset_log_sigma() max_alpha = args.alpha optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) with trange(args.epochs) as t: for j in t: loss_train = 0 for k in range(args.num_steps): inputs, targets, _, _ = data_loader.get_batch( args.batch_size, args.num_points, device=device) predictive_distribution, _, _ = model( inputs, targets, args.sampling) loss = -predictive_distribution.log_prob(targets).mean() writer.add_scalar('NLL', loss.item(), j * args.num_steps + k) if args.alpha > 0: alpha = min(j / args.warmup_epochs, 1.0) * max_alpha kld = model.regularization(alpha) loss = loss + kld writer.add_scalar('KLD', kld.item(), j * args.num_steps + k) loss_train += loss optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 40.0) optimizer.step() t.set_description('Loss (train): {:5.4f}'.format( loss_train.item() / args.num_steps)) performance[j] = loss_train.item() / args.num_steps torch.save([model.state_dict(), performance], args.save_path + str(i) + '.pth')