def inception_score(imgs, model_file, cuda=True, batch_size=32, resize=False, splits=1): """Computes the inception score of the generated images imgs imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1] cuda -- whether or not to run on GPU batch_size -- batch size for feeding into Inception v3 splits -- number of splits """ N = len(imgs) assert batch_size > 0 assert N > batch_size device = torch.device('cuda' if cuda else 'cpu') # Set up dtype if cuda: dtype = torch.cuda.FloatTensor else: if torch.cuda.is_available(): print( "WARNING: You have a CUDA device, so you should probably set cuda=True" ) dtype = torch.FloatTensor # Set up dataloader dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size, drop_last=True) # Load generator and embeddings if args.use_skip_thought: model = BayesianUniSkip('data/skip_thoughts', imgs.word_to_idx.keys()) for param in model.parameters(): param.requires_grad = False elif args.use_bert: model = BertModel.from_pretrained('bert-base-uncased') model.eval() elif args.use_gpt: model = OpenAIGPTModel.from_pretrained('openai-gpt') model.eval() else: model = RnnEncoder(dict_size=len(word_to_idx), embed_size=args.embed_size, hidden_dim=args.rnn_hidden_dim, drop_prob=0.5) generator = Generator().to(device) trainer = Trainer(dataloader, model, generator, None, None, None, None, device, None) trainer.load_model(model_file) trainer.rnn_encoder.eval() trainer.generator.eval() # Load inception model inception_model = inception_v3(pretrained=True, transform_input=False).type(dtype) inception_model.eval() up = nn.Upsample(size=(299, 299), mode='bilinear').type(dtype) def get_pred(x): if resize: x = up(x) x = inception_model(x) return F.softmax(x).data.cpu().numpy() # Get predictions preds = np.zeros((N // batch_size, 1000)) for i, batch in enumerate(dataloader, 0): print("Calculating Inception Score... iter: {} / {} ".format( i, N // batch_size), end='\r') # batch = batch.type(dtype) # batchv = Variable(batch) imgs, caps, cap_lens, fake_caps, fake_cap_lens = trainer.prepare_data( batch) # Text embedding sent_emb, fake_sent_emb = trainer.embed_text(caps, cap_lens, fake_caps, fake_cap_lens, batch_size) batch_size_i = caps.size()[0] sampled = torch.randn((batch_size_i, generator.z_size)).to(device) batchv = generator(sent_emb, sampled) preds[i * batch_size:i * batch_size + batch_size_i] = get_pred(batchv) print() # Now compute the mean kl-div split_scores = [] for k in range(splits): part = preds[k * (N // splits):(k + 1) * (N // splits), :] py = np.mean(part, axis=0) scores = [] for i in range(part.shape[0]): pyx = part[i, :] scores.append(entropy(pyx, py)) split_scores.append(np.exp(np.mean(scores))) return np.mean(split_scores), np.std(split_scores)