Пример #1
0
def print_caption(model, device, dataset):
    transform = transforms.Compose(
        [
            transforms.Resize((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )
    model.eval()
    test_img = transform(Image.open("child.jpg").convert("RGB")).unsqueeze(0)
    print(generate_caption(model, test_img.to(device), dataset.vocab))
Пример #2
0
def main():
    if not os.path.exists(os.path.join(config.tensorboard_dir, config.phone)):
        os.makedirs(os.path.join(config.tensorboard_dir, config.phone))
    if not os.path.exists(os.path.join(config.checkpoint_dir, config.phone)):
        os.makedirs(os.path.join(config.checkpoint_dir, config.phone))

    device = torch.device('cuda:0' if config.use_cuda else 'cpu')
    models = WESPE(config).to(device)
    if config.load_iter != 0:
        load_checkpoints(models)

    if config.is_train:
        models.train()
        writer = SummaryWriter(
            logdir=os.path.join(config.tensorboard_dir, config.phone))
        train(models, writer, device)
    else:
        models.eval()
        test(models, device)
Пример #3
0
def generate_caption(model, image, vocabulary, max_length=50):
    model.eval()
    # Initializing an empty list
    generated_caption = []

    # Inference time so no grad is required
    with torch.no_grad():

        # Defining the initial Input and the cell state
        x = model.encoder(image).unsqueeze(0)
        state = None

        for _ in range(max_length):
            # finding the hidden and cell states
            hidden, state = model.decoder.lstm(x, state)

            # applying the linear layer on the hidden state to get the output distribution
            output = model.decoder.linear(hidden.squeeze(0))

            # find out the word with the highest probability
            predicted = output.argmax(1)

            # appending the index of the word in our generated_caption list
            generated_caption.append(predicted.cpu().detach().numpy().tolist())

            # setting the input for the next iteration
            x = model.decoder.embedding(predicted).unsqueeze(0)

            # if our model predicts End of Sequence then we just stop
            if vocabulary.itos[predicted.item()] == "<EOS>":
                break

    # return generated_caption
    # we convert the indices to the words
    caption = []
    for i in range(len(generated_caption)):
        idx = int(generated_caption[i][0])
        caption.append(vocabulary.itos[idx])
    return caption
Пример #4
0
def test(model):
    img_list=[]
    tag_list=[]
    seg_list=[]
    with torch.no_grad():
        model.to(device)
        model.eval()
        # img,tag=next(iter(test_loader))
        for img,tag in test_loader:
            img=img.to(device)
            img_list.append(img)
            tag_list.append(tag)
            output=model(img)
            label=output.argmax(dim=1)
            tmp=label.cpu()
            img_final=torch.from_numpy(label2image(tmp))
            seg_list.append(img_final)
        img=torch.cat(img_list,dim=0)
        tag=torch.cat(tag_list,dim=0)
        seg=torch.cat(seg_list,dim=0)
    score=iou(seg,tag)
    pic_pred(img[0:4].numpy(),tag[0:4].numpy(),seg[0:4].numpy())
Пример #5
0
            loss = criterion(output_pred, target_output)
            loss.backward()
            optimizer.step()
            topv, topi = output_pred.topk(1)
            check_target = (topi.reshape(-1) == target_output)
            ACC += check_target.float().sum()
            if batchHelper_train._index_in_epoch % batch_size == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.9f} current ACC {} '\
                    .format(y, batchHelper_train._index_in_epoch, totalSample_train,\
                    (batchHelper_train._index_in_epoch * 100.) / totalSample_train, \
                        loss.item(),(check_target.float().sum()/batch_size)*100))

        ############# validation #####################
        batchHelper_val.resetIndex()
        models.eval()
        ACC = 0
        coutBatch = 0
        while batchHelper_val._epochs_completed == 0:
            input_image, labelImage = batchHelper_val.next_batch(
                batch_size, True)
            inputImageDataset = torch.from_numpy(input_image)
            inputImageDataset = inputImageDataset.to(device=device,
                                                     dtype=torch.float)

            target_output = torch.from_numpy(labelImage.astype(int))
            target_output = target_output.to(device=device, dtype=torch.long)
            output_pred = models(inputImageDataset)
            topv, topi = output_pred.topk(1)
            check_target = (topi.reshape(-1) == target_output)
            ACC += check_target.float().sum()
Пример #6
0
 def _freeze_bn(self):
     for m in self.modules():
         if isinstance(m, nn.BatchNorm2d):
             m.eval()
Пример #7
0
def test_visualisation(model_path, models, test_images_paths, test_dataset, mode ,device):
	'''
	Test and save sample images with bounding boxes and the labels

	Inputs:
	- model_path : string( path to the model )
	- models : weights (model weights)
	- test_images_paths : string ( model path )
	- test_dataset : Dataset (testing dataset)
	- device : device (cuda)

	Outputs:

	saved images
	'''
	import random

	label_dict= { 1: 'Car', 2 : 'Van' , 3 : 'Truck', 0 : 'Background'}

	directory = os.path.join('results', 'sample_bbox')


	def save_image(image,directory, filename):
		if not os.path.exists(directory):
			os.makedirs(directory)

		img_file = os.path.join(directory, filename)

		image.save(img_file)

	print(f'Loading model to score images. Scores saved {model_path}') # Testing the model

	mean_test_accuracy = []


	model_file = torch.load(model_path)
	models.load_state_dict(model_file)

	print('Model loaded')


	models.to(device)
	models.eval()

	with torch.no_grad():

		for path in test_images_paths:
			with tqdm(total = len(test_dataset)) as bar:
				for image in test_dataset.images:
					img = os.path.join(path, image)
					l_img, pic_image = load_image(img)

					output = models(l_img)

					im_show = l_img.permute(2,0,3,1)
					im_show = im_show.squeeze(1)

					labels = output[0]['labels']
					scores = output[0]['scores']

					mean_score = scores.mean()


					if torch.isnan(mean_score).any():
						continue

					# print(f'This is the mean score : {mean_score}')

					rect = output[0]['boxes']

					if rect.nelement() != 0:
						i = 0
						int_rects = rect.int().cpu().numpy()
						labels = labels.int().cpu().numpy()
						scores = scores.float().cpu().numpy()

						for int_rect, label, score in zip(int_rects, labels, scores):
							# print(label_dict[label], score)
							if score >= 0.5:
								r = random.randint(20,255)
								g = random.randint(20,255)
								b = random.randint(20,255)
								rgb = (r,g,b)

								x0,y0 ,x1,y1 = int_rect
								img1 = ImageDraw.Draw( pic_image )   
								font = ImageFont.truetype("bevan.ttf", 20)
								# img1.text([x0,y0,x1,y1+10], label, fill=(255,255,0))
								img1.text((0,0+i),f'{label_dict[label]} {score} ', rgb,font=font)
								img1.rectangle([x0,y0 ,x1,y1], outline = rgb, width = 3) # Draw the text on the 
								i += 20
							else:
								continue

						save_image(pic_image, os.path.join(directory, str(mode)), f'{image[:-4]}_samplebbox.png')

					mean_score = mean_score.float().cpu().numpy()

					mean_test_accuracy.append(mean_score)
					bar.update()

		print('FINISHED TESTING')