transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) word_embedding = None train_loader = data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_threads) model = VisualSemanticEmbedding(args.embed_ndim) if not args.no_cuda: model.cuda() optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, model.parameters()), lr=args.learning_rate) for epoch in range(args.num_epochs): avg_loss = 0 for i, (img, desc, len_desc) in enumerate(train_loader): img = Variable(img.cuda() if not args.no_cuda else img) desc = Variable(desc.cuda() if not args.no_cuda else desc) len_desc, indices = torch.sort(len_desc, 0, True) indices = indices.numpy() img = img[indices, ...] desc = desc[indices, ...].transpose(0, 1)
word_embedding, args.max_nwords, transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) word_embedding = None train_loader = data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_threads) model = VisualSemanticEmbedding(args.embed_ndim) if not args.no_cuda: model.cuda() optimizer = torch.optim.Adam( [x for x in model.parameters() if x.requires_grad], lr=args.learning_rate) for epoch in range(args.num_epochs): avg_loss = 0 for i, (img, seg, desc, len_desc) in enumerate(train_loader): img = Variable(img.cuda() if not args.no_cuda else img) desc = Variable(desc.cuda() if not args.no_cuda else desc) len_desc, indices = torch.sort(len_desc, 0, True)
required=True, help='root directory of output') parser.add_argument('--no_cuda', action='store_true', help='do not use cuda') args = parser.parse_args() if not args.no_cuda and not torch.cuda.is_available(): print('Warning: cuda is not available on this machine.') args.no_cuda = True if __name__ == '__main__': print('Loading a pretrained fastText model...') word_embedding = fasttext.load_model(args.fasttext_model) print('Loading a pretrained model...') txt_encoder = VisualSemanticEmbedding(args.embed_ndim) txt_encoder.load_state_dict(torch.load(args.text_embedding_model)) txt_encoder = txt_encoder.txt_encoder G = Generator() G.load_state_dict(torch.load(args.generator_model)) G.eval() if not args.no_cuda: txt_encoder.cuda() G.cuda() transform = transforms.Compose([ transforms.Scale(74), transforms.CenterCrop(64), transforms.ToTensor()
transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) word_embedding = None train_loader = data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_threads) model = VisualSemanticEmbedding(args.embed_ndim) if not args.no_cuda: model.cuda() optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, model.parameters()), lr=args.learning_rate) for epoch in range(args.num_epochs): avg_loss = 0 for i, (img, desc, len_desc) in enumerate(train_loader): img = Variable(img.cuda() if not args.no_cuda else img) desc = Variable(desc.cuda() if not args.no_cuda else desc) len_desc, indices = torch.sort(len_desc, 0, True) indices = indices.numpy() img = img[indices, ...]