class Train(object): def __init__(self, training_file='../res/trump_tweets.txt', model_file='../res/model.pt', n_epochs=1000000, hidden_size=256, n_layers=2, learning_rate=0.001, chunk_len=140): self.training_file = training_file self.model_file = model_file self.n_epochs = n_epochs self.hidden_size = hidden_size self.n_layers = n_layers self.learning_rate = learning_rate self.chunk_len = chunk_len self.file, self.file_len = read_file(training_file) if os.path.isfile(model_file): self.decoder = torch.load(model_file) print('Loaded old model!') else: self.decoder = RNN(n_characters, hidden_size, n_characters, n_layers) print('Constructed new model!') self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), learning_rate) self.criterion = nn.CrossEntropyLoss() self.generator = Generator(self.decoder) def train(self, inp, target): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(self.chunk_len): output, hidden = self.decoder.forward(inp[c], hidden) loss += self.criterion(output, target[c]) loss.backward() self.decoder_optimizer.step() return loss.data[0] / self.chunk_len def save(self): torch.save(self.decoder, self.model_file) print('Saved as %s' % self.model_file) def random_training_set(self, chunk_len): start_index = random.randint(0, self.file_len - chunk_len) end_index = start_index + chunk_len + 1 chunk = self.file[start_index:end_index] inp = char_tensor(chunk[:-1]) target = char_tensor(chunk[1:]) return inp, target def start(self): start_time = time.time() print("Training for %d epochs..." % self.n_epochs) best_loss = None for epoch in range(1, self.n_epochs + 1): loss = self.train(*self.random_training_set(self.chunk_len)) if not best_loss or loss < best_loss: self.save() best_loss = loss print('[%s (%d %d%%) %.4f]' % (time_since(start_time), epoch, epoch / self.n_epochs * 100, loss)) print(self.generator.generate(), '\n') print("Finished training, saving...") self.save()
def main(args): if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.descriptors_path, args.json_labels_path, args.bs) model = RNN(num_descriptors=args.num_descriptors, hidden_size=args.hidden_size, lstm_in_size=args.input_size) if torch.cuda.is_available(): model.cuda() model.train() # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_loss = torch.nn.BCEWithLogitsLoss() model_loss = Loss() losses = [] try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate loss_epoch = [] for step, (descriptors, labels) in enumerate(data_loader): if torch.cuda.is_available(): descriptors = descriptors.cuda() labels = labels.cuda() model.zero_grad() attention = model(descriptors) loss = model_loss(attention, labels) loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + ' - Loss: ' + str(float(loss))) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/models_361_dropout', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
#Call train on the model for epoch in range(1, num_epochs + 1): epochs.append(epoch) hidden = rnn.init_hidden() loss_total = 0 acc = 0 # Get training data for this cycle for i, sequence in enumerate(train_sequences): input_variable = Variable(torch.LongTensor(sequence[:-1])) targets = sequence[1:] target_variable = Variable(torch.LongTensor(targets)) hidden = repackage_hidden(hidden) rnn.zero_grad() output, hidden = rnn(input_variable, hidden) loss = criterion(output, target_variable.contiguous().view(-1)) val = (target_variable.data.view(-1).eq( torch.max(output, 1)[1].data).sum()) acc += (val / float(len(output.data))) loss.backward() torch.nn.utils.clip_grad_norm(rnn.parameters(), clip) optimizer.step() # Keep track of loss #print("Loss for this step is", loss) loss_total += loss.data[0]
def main(batch_size, embed_size, num_hiddens, num_layers, ln_hidden, ln_output, rec_unit, learning_rate=1e-4, log_step=10, num_epochs=50, save_step=100, ngpu=1): # hyperparameters num_workers = 0 checkpoint_dir = 'checkpoint' # Image Preprocessing transform = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), } # load data vocab = build_vocab(path='relative_captions_shoes.json') train_data, train_loader = data_and_loader( path='relative_captions_shoes.json', mode='train', vocab=vocab, transform=transform['train'], batch_size=batch_size) val_data, val_loader = data_and_loader(path='relative_captions_shoes.json', mode='valid', vocab=vocab, transform=transform['val'], batch_size=batch_size) losses_val = [] losses_train = [] # Build the models initial_step = initial_epoch = 0 encoder = CNN(embed_size) ### embed_size: power of 2 middle = fcNet(embed_size, ln_hidden, ln_output) decoder = RNN(ln_output, num_hiddens, len(vocab), num_layers, rec_unit=rec_unit, drop_out=0.1) # Loss, parameters & optimizer loss_fun = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): print('Epoch: {}'.format(epoch)) for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() middle.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPUs features = nn.parallel.data_parallel( encoder, images, range(ngpu)) rnn_input = nn.parallel.data_parallel( middle, features, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) train_loss = loss_fun(outputs, targets) losses_train.append(train_loss.item()) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] #features = encoder(target_images) - encoder(refer_images) features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) val_loss = loss_fun(outputs, targets) batch_loss_val.append(val_loss.item()) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(rnn_input) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
class Model(): def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss() def train(self, inp, target, chunk_len=200): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(chunk_len): out, hidden = self.decoder(inp[c], hidden) loss += self.criterion(out, target[c]) loss.backward() self.optimizer.step() return loss.data[0] / chunk_len def generate(self, prime_str, predict_len=100, temperature=0.8): predicted = prime_str hidden = self.decoder.init_hidden() prime_input = char_tensor(prime_str, self.decoder.gpu) # Use prime string to build up hidden state for p in range(len(prime_str) - 1): _, hidden = self.decoder(prime_input[p], hidden) inp = prime_input[-1] for p in range(predict_len): out, hidden = self.decoder(inp, hidden) # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp() out_dist = out.data.view(-1).div(temperature).exp() top_i = torch.multinomial(out_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = char_tensor(predicted_char, self.decoder.gpu) return predicted def save(self): model_name = "char-rnn-gru.pt" if not os.path.exists("save"): os.mkdir("save") torch.save(self.decoder, "save/%s" % model_name) print("--------------> [Checkpoint] Save model into save/%s" % model_name) def load(self, model_path="save/char-rnn-gru.pt"): self.decoder = torch.load(model_path)
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.json_labels_path, args.bs) model = RNN(lstm_hidden_size=args.hidden_size) if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() # model_loss = Loss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch in (3, 7, 15): if epoch == 3: p = 2 / 3 if epoch == 7: p = 1 / 3 if epoch == 15: p = 0 loss_epoch = [] loss1_epoch = [] loss2_epoch = [] for step, (tensors, masks, gt) in enumerate(data_loader): if torch.cuda.is_available(): tensors = tensors.cuda() masks = masks.cuda() gt = gt.cuda() model.zero_grad() out, att = model(tensors, masks, gt, p) loss1 = model_loss(out, gt) # att[:, :-1, :] -> attention produced (location in the next frame) until the last frame -1 (49) # gt[:, 1:, :] -> gt from the second frame until the last frame (49) loss2 = model_loss(att[:, :-1, :], gt[:, 1:, :]) loss = loss1 + loss2 loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) loss1_epoch.append(loss1.cpu().detach().numpy()) loss2_epoch.append(loss2.cpu().detach().numpy()) #print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + # str(len(data_loader)) + ' - Loss: ' + str(float(loss)) + " (Loss1: " + str(float(loss1)) # + ", Loss2: " + str(float(loss2)) + ")") loss_epoch_mean = np.mean(np.array(loss_epoch)) loss1_epoch_mean = np.mean(np.array(loss_epoch)) loss2_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean) + " (loss1: " + str(loss1_epoch_mean) + ", loss2: " + str(loss2_epoch_mean) + ")") if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.bs, args.json_labels_path, num_workers=8) model = RNN() if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch < 3: p = 1.0 elif epoch >= 3 and epoch < 6: p = 0.5 elif epoch >= 6 and epoch < 9: p = 0.25 else: p = 0.0 loss_epoch = [] for step, (feat_maps, gt) in enumerate(data_loader): if torch.cuda.is_available(): feat_maps = feat_maps.cuda() gt = gt.cuda() model.zero_grad() out = model(feat_maps, gt, p) loss = model_loss(out, gt) loss.backward() optimizer.step() loss_step = loss.cpu().detach().numpy() loss_epoch.append(loss_step) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + " - Loss: " + str(loss_step)) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()