def model_1(args): if os.path.isdir(os.getcwd() + '/results/images/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/images/' + args.run_name) if os.path.isdir(os.getcwd() + '/results/history/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/history/' + args.run_name) if os.path.isdir(os.getcwd() + '/results/files/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/files/' + args.run_name) datapath = args.datadir #args.batch_size = 2 args.img_size = 224 dataset, data_loader = utils.get_dataset(datapath, args.img_size, \ args.batch_size) classes, class_to_idx, idx_to_class = utils.get_classes(dataset) word_dim = 300 label_criterion = nn.CrossEntropyLoss() reconstr_criterion = nn.L1Loss() #reconstr_criterion = nn.MSELoss() model = BimodalDAEImage(300, 2048, n_classes=len(classes)) cnn = resnet101(pretrained=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) print('\nNum classes: %r, num images: %r' % (len(classes), len(dataset))) #### change temp #word_vecs = utils.get_wvecs_json(os.getcwd() + '/data/files/wvecs.json', classes, word_dim) word_vecs = utils.get_word_vectors(os.getcwd() + '/data/files/wvecs.json', classes, word_dim) loss_hist, metric_hist = {}, {} softmax = nn.Softmax(dim=1) for epoch in range(args.epochs): print('Epoch %r' % epoch) log.info('Epoch %r' % epoch) loss_hist[epoch], metric_hist[epoch] = {}, {} for batch_idx, (img, target_tensor) in enumerate(data_loader): batch_acc, batch_loss = [], {'reconstr': [], 'classification': []} target_idxs = target_tensor.data.numpy().tolist() target_names = [idx_to_class[idx] for idx in target_idxs] target_labels = torch.tensor([[1 if i == idx else 0 for i in \ range(len(classes))] for idx in target_idxs], \ dtype=torch.long) # previously target dist reps target_textual = torch.tensor([word_vecs[name] for name in target_names], \ dtype=torch.float32) #print('Text', target_textual.size()) #img_rep = img[0].reshape(1, 3, args.img_size, args.img_size) #print(img_rep.size()) #rep = vgg.forward(img_rep) #print(rep.size()) target_visual = torch.tensor([ cnn.forward(img[idx].reshape(1, 3, args.img_size, args.img_size)).data.numpy() for idx in range(len(target_idxs)) ], dtype=torch.float32) #print('Visual', target_visual.size()) n_samples = len(target_idxs) optimizer.zero_grad() img_reconstr, text_reconstr, hidden = model.forward(target_visual, \ target_textual) textual_loss = reconstr_criterion(text_reconstr, target_textual) textual_loss.backward(retain_graph=True) visual_loss = reconstr_criterion(img_reconstr, target_visual) visual_loss.backward(retain_graph=True) #print('Textual reconstr', text_reconstr.size()) #print('Visual reconstr', img_reconstr.size()) #print('Hidden', hidden.size()) preds = softmax(hidden) pred_loss = label_criterion(preds, target_tensor) pred_loss.backward() optimizer.step() if epoch % 10 == 0: state = {'epoch': epoch + 1, 'state_dict': \ model.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, os.getcwd() + "/model_states/" + args.run_name) return
def model_1(args): if os.path.isdir(os.getcwd() + '/results/images/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/images/' + args.run_name) if os.path.isdir(os.getcwd() + '/results/history/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/history/' + args.run_name) if os.path.isdir(os.getcwd() + '/results/files/' + args.run_name) is False: os.mkdir(os.getcwd() + '/results/files/' + args.run_name) datapath = args.datadir args.img_size = 224 dataset, data_loader = utils.get_dataset(datapath, args.img_size, \ args.batch_size) classes, class_to_idx, idx_to_class = utils.get_classes(dataset) word_dim = 300 label_dim = len(classes) model = BimodalDAEImage(300, 2048, n_classes=len(classes)) cnn = resnet101(pretrained=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) print('\nNum classes: %r, num images: %r' % (len(classes), len(dataset))) word_vecs = utils.get_wvecs_json(os.getcwd() + '/data/files/wvecs.json', classes, word_dim) #word_vecs = utils.get_word_vectors(os.getcwd() + '/data/files/wvecs.json', classes, word_dim) encoding_dict = {} with torch.no_grad(): for batch_idx, (img, target_tensor) in enumerate(data_loader): target_idxs = target_tensor.data.numpy().tolist() target_names = [idx_to_class[idx] for idx in target_idxs] target_labels = torch.tensor([[1 if i == idx else 0 for i in \ range(len(classes))] for idx in target_idxs], \ dtype=torch.long) # previously target dist reps target_textual = torch.tensor([word_vecs[name] for name in target_names], \ dtype=torch.float32) target_visual = torch.tensor( [cnn.forward( img[idx].reshape(1, 3, args.img_size, args.img_size)).data.numpy() for idx in range(len(target_idxs))], dtype=torch.float32 ) n_samples = len(target_idxs) img_reconstr, text_reconstr, hidden = model.forward(target_visual, \ target_textual) print('Hidden', hidden.size()) #preds = softmax(hidden) reps = hidden.data.numpy() for idx in range(len(reps)): target = target_names[idx] print(target) if target not in encoding_dict.keys(): encoding_dict[target] = [] #val = reps[idx].view(1, -1) encoding_dict[target].append(list(reps[idx].tolist())) f = open(os.getcwd() + '/results/files/' + args.run_name + '/encoding_dict.json', 'w+') f.write(json.dumps(encoding_dict)) print('Eval done!')