def main(): epochs = 301 seq_batch_size = 200 print_yes = 100 iscuda = False # create our network, optimizer and loss function net = RNN(len(chars), 100, 150, 2) #instanciate a RNN object optim = torch.optim.Adam(net.parameters(), lr=6e-4) loss_func = torch.nn.functional.nll_loss if iscuda: net = net.cuda() # main training loop: for epoch in range(epochs): dat = getSequence(book, seq_batch_size) dat = torch.LongTensor( [chars.find(item) for item in dat] ) #find corresponding char index for each character and store this in tensor # pull x, y and initialize hidden state if iscuda: x_t = dat[:-1].cuda() y_t = dat[1:].cuda() hidden = net.init_hidden().cuda() else: x_t = dat[:-1] y_t = dat[1:] hidden = net.init_hidden() # forward pass logprob, hidden = net.forward(x_t, hidden) loss = loss_func(logprob, y_t) # update optim.zero_grad() loss.backward() optim.step() # print the loss for every kth iteration if epoch % print_yes == 0: print('*' * 60) print('\n epoch {}, loss:{} \n'.format(epoch, loss)) print('sample speech:\n', test_words(net, chars, seq_batch_size)) torch.save(net.state_dict(), 'trainedBook_v2.pt')
loader_train = DataLoader(train_set, batch_size=BATCH_SIZE, sampler=train_sampler, shuffle=False, num_workers=4) loader_val = DataLoader(train_set, batch_size=BATCH_SIZE, sampler=validation_sampler, shuffle=False, num_workers=4) print("Train Size: ", len(loader_train.sampler.indices)) print("Validation Size: ", len(loader_val.sampler.indices)) num_classes = len(train_set.label_encoder.classes_) model = RNN(embeddings, num_classes=num_classes, **_hparams) weights = class_weigths(train_set.labels) if torch.cuda.is_available(): model.cuda() weights = weights.cuda() criterion = torch.nn.CrossEntropyLoss(weight=weights) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=lr) ############################################################# # Train ############################################################# best_val_loss = None colms_l = ['Train_Loss', 'Val_Loss'] colms_acc = ['Train_Acc', 'Val_Acc']
def main(argv): global args args = parser.parse_args(argv) if args.threads == -1: args.threads = torch.multiprocessing.cpu_count() - 1 or 1 print('===> Configuration') print(args) cuda = args.cuda if cuda: if torch.cuda.is_available(): print('===> {} GPUs are available'.format( torch.cuda.device_count())) else: raise Exception("No GPU found, please run with --no-cuda") # Fix the random seed for reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) # Data loading print('===> Loading entire datasets') with open(args.data_path + 'train.seqs', 'rb') as f: train_seqs = pickle.load(f) with open(args.data_path + 'train.labels', 'rb') as f: train_labels = pickle.load(f) with open(args.data_path + 'valid.seqs', 'rb') as f: valid_seqs = pickle.load(f) with open(args.data_path + 'valid.labels', 'rb') as f: valid_labels = pickle.load(f) with open(args.data_path + 'test.seqs', 'rb') as f: test_seqs = pickle.load(f) with open(args.data_path + 'test.labels', 'rb') as f: test_labels = pickle.load(f) max_code = max( map(lambda p: max(map(lambda v: max(v), p)), train_seqs + valid_seqs + test_seqs)) num_features = max_code + 1 print(" ===> Construct train set") train_set = VisitSequenceWithLabelDataset(train_seqs, train_labels, num_features, reverse=False) print(" ===> Construct validation set") valid_set = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features, reverse=False) print(" ===> Construct test set") test_set = VisitSequenceWithLabelDataset(test_seqs, test_labels, num_features, reverse=False) train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True, collate_fn=visit_collate_fn, num_workers=args.threads) valid_loader = DataLoader(dataset=valid_set, batch_size=args.eval_batch_size, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) test_loader = DataLoader(dataset=test_set, batch_size=args.eval_batch_size, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) print('===> Dataset loaded!') # Create model print('===> Building a Model') model = RNN(dim_input=num_features, dim_emb=128, dim_hidden=128) if cuda: model = model.cuda() print(model) print('===> Model built!') weight_class0 = torch.mean(torch.FloatTensor(train_set.labels)) weight_class1 = 1.0 - weight_class0 weight = torch.FloatTensor([weight_class0, weight_class1]) criterion = nn.CrossEntropyLoss(weight=weight) if args.cuda: criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=False, weight_decay=args.weight_decay) scheduler = ReduceLROnPlateau(optimizer, 'min') best_valid_epoch = 0 best_valid_loss = sys.float_info.max train_losses = [] valid_losses = [] if not os.path.exists(args.save): os.makedirs(args.save) for ei in trange(args.epochs, desc="Epochs"): # Train _, _, train_loss = rnn_epoch(train_loader, model, criterion=criterion, optimizer=optimizer, train=True) train_losses.append(train_loss) # Eval _, _, valid_loss = rnn_epoch(valid_loader, model, criterion=criterion) valid_losses.append(valid_loss) scheduler.step(valid_loss) is_best = valid_loss < best_valid_loss if is_best: best_valid_epoch = ei best_valid_loss = valid_loss # evaluate on the test set test_y_true, test_y_pred, test_loss = rnn_epoch( test_loader, model, criterion=criterion) if args.cuda: test_y_true = test_y_true.cpu() test_y_pred = test_y_pred.cpu() test_auc = roc_auc_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") test_aupr = average_precision_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") with open(args.save + 'train_result.txt', 'w') as f: f.write('Best Validation Epoch: {}\n'.format(ei)) f.write('Best Validation Loss: {}\n'.format(valid_loss)) f.write('Train Loss: {}\n'.format(train_loss)) f.write('Test Loss: {}\n'.format(test_loss)) f.write('Test AUROC: {}\n'.format(test_auc)) f.write('Test AUPR: {}\n'.format(test_aupr)) torch.save(model, args.save + 'best_model.pth') torch.save(model.state_dict(), args.save + 'best_model_params.pth') # plot if args.plot: plt.figure(figsize=(12, 9)) plt.plot(np.arange(len(train_losses)), np.array(train_losses), label='Training Loss') plt.plot(np.arange(len(valid_losses)), np.array(valid_losses), label='Validation Loss') plt.xlabel('epoch') plt.ylabel('Loss') plt.legend(loc="best") plt.tight_layout() plt.savefig(args.save + 'loss_plot.eps', format='eps') plt.close() print('Best Validation Epoch: {}\n'.format(best_valid_epoch)) print('Best Validation Loss: {}\n'.format(best_valid_loss)) print('Train Loss: {}\n'.format(train_loss)) print('Test Loss: {}\n'.format(test_loss)) print('Test AUROC: {}\n'.format(test_auc)) print('Test AUPR: {}\n'.format(test_aupr))
params = {'LR': LR, 'VOCAB_SIZE': VOCAB_SIZE, 'NO_WORD_EMBEDDINGS': NO_WORD_EMBEDDINGS, 'HIDDEN_SIZE': HIDDEN_SIZE, 'BATCH_SIZE': BATCH_SIZE, 'NUM_LAYERS': NUM_LAYERS, 'train_imagepaths_and_captions': train_imagepaths_and_captions, 'val_imagepaths_and_captions': val_imagepaths_and_captions, 'pretrained_cnn_dir': pretrained_cnn_dir, 'pretrained_word_embeddings_file': pretrained_word_embeddings_file, 'transform_train': transform_train, 'transform_val': transform_val, 'WEIGHT_DECAY': WEIGHT_DECAY, 'ADAM_FLAG': ADAM_FLAG, 'RNN_DROPOUT':RNN_DROPOUT, 'CNN_DROPOUT': CNN_DROPOUT, 'GRAD_CLIP': GRAD_CLIP} print('Initializing models...') encoder = CNN(NO_WORD_EMBEDDINGS, pretrained_cnn_dir, freeze=True, dropout_prob=CNN_DROPOUT, model_name='resnet152') decoder = RNN(VOCAB_SIZE, NO_WORD_EMBEDDINGS, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, pre_trained_file=pretrained_word_embeddings_file, freeze=False, dropout_prob=RNN_DROPOUT) params['encoder'] = encoder params['decoder'] = decoder encoder.cuda() decoder.cuda() print('Initializing optimizer...') model_paras = list(encoder.parameters()) + list(decoder.parameters()) optimizer = optim.Adam(model_paras, lr=LR, weight_decay=WEIGHT_DECAY) params['optimizer'] = optimizer pickle.dump(params, open(init_params_file, 'wb')) # initialize accumulators. current_epoch = 1 batch_step_count = 1 time_used_global = 0.0 checkpoint = 1
class TextClassifier: def __init__(self, batch_size, iterations, initial_lr, hidden_size, dropout, kernel_sz, num_layers): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.use_cuda else 'cpu') self.data = DataReader() train_iter, val_iter, test_iter = self.data.init_dataset( batch_size, ('cuda:0' if self.use_cuda else 'cpu')) self.train_batch_loader = BatchGenerator(train_iter, 'text', 'label') self.val_batch_loader = BatchGenerator(val_iter, 'text', 'label') self.test_batch_loader = BatchGenerator(test_iter, 'text', 'label') # Store hyperparameters self.batch_size = batch_size self.iterations = iterations self.initial_lr = initial_lr # Create Model emb_size, emb_dim = self.data.TEXT.vocab.vectors.size() # padding = (math.floor(kernel_sz / 2), 0) # self.model = CNN(emb_size=emb_size, emb_dimension=emb_dim, # output_size=len(self.data.LABEL.vocab), # dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=padding, # out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors) self.model = RNN(emb_size=emb_size, emb_dimension=emb_dim, pretrained_emb=self.data.TEXT.vocab.vectors, output_size=len(self.data.LABEL.vocab), num_layers=num_layers, hidden_size=hidden_size, dropout=dropout) if self.use_cuda: self.model.cuda() def train(self, min_stride=3): train_loss_hist = [] val_loss_hist = [] train_acc_hist = [] val_acc_hist = [] test_acc_hist = [] best_score = 0.0 loss = 0.0 for itr in range(self.iterations): print("\nIteration: " + str(itr + 1)) optimizer = optim.SGD(self.model.parameters(), lr=self.initial_lr) self.model.train() total_loss = 0.0 total_acc = 0.0 steps = 0 data_iter = iter(self.train_batch_loader) for i in range(len(self.train_batch_loader)): ((x_batch, x_len_batch), y_batch) = next(data_iter) # if torch.min(x_len_batch) > min_stride: optimizer.zero_grad() loss, logits = self.model.forward(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 loss.backward() optimizer.step() train_loss_hist.append(total_loss / steps) train_acc_hist.append(total_acc / len(self.data.train_data)) val_loss, val_acc = self.eval_model(self.val_batch_loader, len(self.data.val_data)) val_loss_hist.append(val_loss) val_acc_hist.append(val_acc) if val_acc > best_score: best_score = val_acc test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.test_data)) print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " + str(total_acc / len(self.data.train_data)) + " }") print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) + " }") test_acc_hist.append(test_acc) return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc_hist def eval_model(self, batch_loader, N, min_stride=3): self.model.eval() total_loss = 0.0 total_acc = 0.0 steps = 0 batch_iterator = iter(batch_loader) with torch.no_grad(): for i in range(len(batch_loader)): ((x_batch, x_len_batch), y_batch) = next(batch_iterator) # if torch.min(x_len_batch) > min_stride: loss, logits = self.model(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() return (total_loss / N), (total_acc / N)
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 1 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # load COCOs dataset IMAGES_PATH = 'data/train2014' CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json' vocab = load_vocab() train_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) IMAGES_PATH = 'data/val2014' CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json' val_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) losses_val = [] losses_train = [] # Build the models ngpu = 1 initial_step = initial_epoch = 0 embed_size = args.embed_size num_hiddens = args.num_hidden learning_rate = 1e-3 num_epochs = 3 log_step = args.log_step save_step = 500 checkpoint_dir = args.checkpoint_dir encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) # Loss criterion = nn.CrossEntropyLoss() if args.checkpoint_file: encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file, args.sample) initial_step, initial_epoch, losses_train, losses_val = meta encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) else: params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() if args.sample: return utils.sample(encoder, decoder, vocab, val_loader) # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = utils.to_var(images, volatile=True) captions = utils.to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPU features = nn.parallel.data_parallel( encoder, images, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) outputs = decoder(features, captions, lengths) train_loss = criterion(outputs, targets) losses_train.append(train_loss.data[0]) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = utils.to_var(images, volatile=True) captions = utils.to_var(captions, volatile=True) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) val_loss = criterion(outputs, targets) batch_loss_val.append(val_loss.data[0]) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(features) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses( losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 2 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) vocab = load_vocab() loader = get_basic_loader(dir_path=os.path.join(args.image_path), transform=transform, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Build the models embed_size = args.embed_size num_hiddens = args.num_hidden checkpoint_path = 'checkpoints' encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file) encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Train the Models try: results = [] with torch.no_grad(): for step, (images, image_ids) in enumerate(tqdm(loader)): images = utils.to_var(images) features = encoder(images) captions = beam_sample(decoder, features) # captions = decoder.sample(features) captions = captions.cpu().data.numpy() captions = [ utils.convert_back_to_text(cap, vocab) for cap in captions ] captions_formatted = [{ 'image_id': int(img_id), 'caption': cap } for img_id, cap in zip(image_ids, captions)] results.extend(captions_formatted) print('Sample:', captions_formatted[0]) except KeyboardInterrupt: print('Ok bye!') finally: import json file_name = 'captions_model.json' with open(file_name, 'w') as f: json.dump(results, f)
def prepare_datasets(): """ Prepares the training, validation and test (Kaggle) datasets used by the XGBoost model \n This function looks into the XGBOOST_CONFIG dictionary in config.py for the following information: \n * which embedding NN to use (looks for the .pth checkpoint in XGBOOST_CONFIG['embedder']) * how to extract the embedding: XGBOOST_CONFIG['embedding_use_hidden', 'embedding_use_output', 'embedding_size'] * how many numeric variables to add as input * where to dump the prepared .npy files: XGBOOST_CONFIG['train_file', 'val_file', 'test_file'] """ checkpoint = torch.load(XGBOOST_CONFIG['embedder']) embed = RNN(config=checkpoint['net_config']).eval() embed.load_state_dict(checkpoint['model']) embed = embed.cuda() annotated_dataset = TweetDataset(dataset_type='all') test_dataset = TweetDataset(dataset_type='test') def get_data(dataset, message): N = len(dataset) data = np.zeros((N, XGBOOST_CONFIG['numeric_data_size'] + XGBOOST_CONFIG['embedding_size'] + 1)) # 1 for answer loader = DataLoader(dataset, batch_size=TRAIN_CONFIG['batch_size'], num_workers=TRAIN_CONFIG['workers'], collate_fn=collate_function, shuffle=False) current_idx = 0 n = len(loader) print('') for batch_index, batch in enumerate(loader): printProgressBar(batch_index, n, prefix=message) batch_size = batch['numeric'].shape[0] numeric = batch['numeric'].cuda() text = batch['embedding'].cuda() if XGBOOST_CONFIG['embedding_use_hidden']: embedding = embed( text, numeric[:, :checkpoint['net_config']['numeric_data_size']] )[1] elif XGBOOST_CONFIG['embedding_use_output']: embedding = torch.exp( embed( text, numeric[:, :checkpoint['net_config'] ['numeric_data_size']])[0]) - 1 else: # expecting a built-in embedding layer -> taking the mean of the embeddings embedding = embed.emb(text).mean(axis=1) data[current_idx:current_idx+batch_size, XGBOOST_CONFIG['numeric_data_size']:-1] = \ embedding.detach().cpu().numpy() data[current_idx:current_idx+batch_size, :XGBOOST_CONFIG['numeric_data_size']] = \ numeric.detach().cpu().numpy() data[current_idx:current_idx + batch_size, -1] = batch['target'].numpy() current_idx += batch_size return data annotated_data = get_data(annotated_dataset, "Preparing train.csv ...") split = int(len(annotated_dataset) * DATASET_CONFIG['train_percent']) np.save(XGBOOST_CONFIG['train_file'], annotated_data[1:split]) np.save(XGBOOST_CONFIG['val_file'], annotated_data[split:]) test_data = get_data(test_dataset, "Preparing evaluation.csv ...") with open(DATASET_CONFIG['test_csv_relative_path'], newline='') as csvfile: ids = [line[0] for line in list(csv.reader(csvfile))[1:]] ids = np.array(ids).reshape(np.shape(ids)[0], 1) prepared_test_data = np.concatenate((test_data, ids), axis=1) np.save(XGBOOST_CONFIG['test_file'], prepared_test_data)