def get_algorithm(self, words, model_dir=None): if model_dir is None: model_dir = self.model_dir if self.algorithm == 'uniskip': return UniSkip(model_dir, words) else: return BiSkip(model_dir, words)
def get_text_enc(config, vocab): skipthoughts_dir, text_enc = config['skipthoughts_dir'], config['txt_enc'] if text_enc == 'BayesianUniSkip': return BayesianUniSkip(skipthoughts_dir, vocab) if text_enc == 'UniSkip': return UniSkip(skipthoughts_dir, vocab) if text_enc == 'BiSkip': return BiSkip(skipthoughts_dir, vocab) if text_enc == 'DropUniSkip': return DropUniSkip(skipthoughts_dir, vocab)
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) #Load vocab_list for uniskip vocab_list = pd.read_csv("./data/vocab_list.csv", header=None) vocab_list = vocab_list.values.tolist()[0] #Build data loader data_loader = get_loader(args.image_dir, args.img_embeddings_dir, args.data_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models #im_encoder = preprocess_get_model.model() attention = T_Att() decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers, args.dropout) uniskip = UniSkip('./data/skip-thoughts', vocab_list) decoder.eval() if torch.cuda.is_available(): #im_encoder.cuda() attention.cuda() decoder.cuda() uniskip.cuda() attention.load_state_dict(torch.load(args.attention_path)) decoder.load_state_dict(torch.load(args.decoder_path)) for i, (images, captions, cap_lengths, qa, qa_lengths, vocab_words) in enumerate(data_loader): # # Set mini-batch dataset img_embeddings = to_var(images.data, volatile=True) captions = to_var(captions) # qa = to_var(qa) # targets = pack_padded_sequence(qa, qa_lengths, batch_first=True)[0] # # Forward, Backward and Optimize # decoder.zero_grad() # attention.zero_grad() # #features = encoder(images) #img_embeddings = im_encoder(images) #uniskip = UniSkip('/Users/tushar/Downloads/code/data/skip-thoughts', vocab_list) cap_embeddings = uniskip(captions, cap_lengths) cap_embeddings = cap_embeddings.data img_embeddings = img_embeddings.data ctx_vec = attention(img_embeddings, cap_embeddings) outputs = decoder.sample(ctx_vec) output_ids = outputs.cpu().data.numpy() qa = qa.numpy() qa = qa[0] # predicted_q = [] # predicted_a = [] sample = [] # flag = -1 for word_id in output_ids: word = vocab.idx2word[word_id] sample.append(word) # if word == '<end>': # if flag == -1: # predicted_q = sample # sample = [] # flag = 0 # else: # predicted_a = sample # predicted_q = ' '.join(predicted_q[1:]) # predicted_a = ' '.join(predicted_a[1:]) sample = ' '.join(sample) actual = [] # print("predicted q was : " + predicted_q) for word_id in qa: word = vocab.idx2word[word_id] actual.append(word) actual = ' '.join(actual) #print(im_id) print("actual_qa : " + actual + " | predicted_qa : " + sample)
# Adds the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab vocab = build_vocab() with open('birdsdataset/birds_vocab.pkl', 'wb') as f: pickle.dump(vocab, f) print('hello') #with open('birdsdataset/birds_vocab.pkl', 'rb') as f: # vocab = pickle.load(f) all_words_in_vocab = vocab.word2idx.keys() uniskip = UniSkip(dir_st, all_words_in_vocab) def get_ids(tokens, vocab): ids = [] # appending start and eos at the beginning and the end respectively for every sequence ids.append(vocab('<start>')) for word in tokens: ids.append(vocab(word)) ids.append(vocab('<end>')) return ids for _class in sorted(os.listdir(embedding_path)): split = '' if _class in train_classes: split = train elif _class in val_classes:
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) #Load vocab_list for uniskip vocab_list = pd.read_csv("./data/vocab_list.csv", header=None) vocab_list = vocab_list.values.tolist()[0] # Build data loader data_loader = get_loader(args.image_dir, args.img_embeddings_dir, args.data_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models #im_encoder = preprocess_get_model.model() attention = T_Att() decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers, args.dropout) uniskip = UniSkip('./data/skip-thoughts', vocab_list) if torch.cuda.is_available(): #im_encoder.cuda() attention.cuda() decoder.cuda() uniskip.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(attention.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, cap_lengths, qa, qa_lengths, vocab_words) in enumerate(tqdm(data_loader)): #Re-initialize decoder hidden state decoder.hidden = decoder.init_hidden() # Set mini-batch dataset img_embeddings = to_var(images.data, volatile=True) captions = to_var(captions) qa = to_var(qa) targets = pack_padded_sequence(qa, qa_lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() attention.zero_grad() #features = encoder(images) #img_embeddings = im_encoder(images) cap_embeddings = uniskip(captions, cap_lengths) cap_embeddings = cap_embeddings.data img_embeddings = img_embeddings.data # print(img_embeddings.size()) # print(type(img_embeddings)) # print(cap_embeddings.size()) #print(type(cap_embeddings)) ctx_vec = attention(img_embeddings,cap_embeddings) outputs = decoder(ctx_vec, qa, qa_lengths) predicted = outputs.max(1)[1] loss = criterion(outputs, targets) loss.backward() optimizer.step() #pred_ids = [] #print(predicted.size()) # pred_ids.append(predicted) # Print log info if i % args.log_step == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' %(epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) #output_ids = predicted.cpu().data.numpy() #sample = [] #for word_id in output_ids: # word = vocab.idx2word[word_id] # sample.append(word) #sample = ' '.join(sample) #print("predicted qa : " + sample) # Save the models if (i+1)%args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' %(epoch+1, i+1))) torch.save(attention.state_dict(), os.path.join(args.model_path, 'attention-%d-%d.pkl' %(epoch+1, i+1)))