Exemplo n.º 1
0
def draw(scores, classification, transformed_anchors, img, labels, writer):
    """show the result of object detection."""
    unnormalize = UnNormalizer()
    idxs = np.where(scores > 0.5)
    img = np.array(255 * unnormalize(img)).copy()

    img[img < 0] = 0
    img[img > 255] = 255

    img = np.transpose(img, (1, 2, 0))

    img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j in range(idxs[0].shape[0]):
        bbox = transformed_anchors[idxs[0][j], :]
        x1 = int(bbox[0])
        y1 = int(bbox[1])
        x2 = int(bbox[2])
        y2 = int(bbox[3])
        label_name = labels[int(classification[idxs[0][j]])]
        score = scores[idxs[0][j]]
        draw_caption(img, (x1, y1, x2, y2),
                     '{}:{:.2f}'.format(label_name, score.item()))

        cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

        writer.add_image(label_name, img.transpose(2, 0, 1))

    return
Exemplo n.º 2
0
def detect_single_image(checkpoint, image_path, visualize=False):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
    configs = deepcopy(checkpoint['model_specs']['training_configs'])
    configs = configs.update(checkpoint['hp_values'])
    labels = checkpoint['labels']
    num_classes = len(labels)
    retinanet = ret50(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    retinanet.eval()

    img = skimage.io.imread(image_path)

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)

    img = img.astype(np.float32) / 255.0
    transform = transforms.Compose([Normalizer(), Resizer(min_side=608)]) #TODO: make this dynamic
    data = transform({'img': img, 'annot': np.zeros((0, 5))})
    img = data['img']
    img = img.unsqueeze(0)
    img = img.permute(0, 3, 1, 2)
    with torch.no_grad():
        scores, classification, transformed_anchors = retinanet(img.to(device=device).float())


        idxs = np.where(scores.cpu() > 0.5)[0]
        scale = data['scale']
        detections_list = []
        for j in range(idxs.shape[0]):
            bbox = transformed_anchors[idxs[j], :]
            label_idx = int(classification[idxs[j]])
            label_name = labels[label_idx]
            score = scores[idxs[j]].item()

            # un resize for eval against gt
            bbox /= scale
            bbox.round()
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
        img_name = image_path.split('/')[-1].split('.')[0]
        filename = img_name + '.txt'
        path = os.path.dirname(image_path)
        filepathname = os.path.join(path, filename)
        with open(filepathname, 'w', encoding='utf8') as f:
            for single_det_list in detections_list:
                for i, x in enumerate(single_det_list):
                    f.write(str(x))
                    f.write(' ')
                f.write('\n')

        if visualize:
            unnormalize = UnNormalizer()


    return filepathname
Exemplo n.º 3
0
def visualize(args):
    model_path = args.model_path
    image_path = args.image_path
    use_gpu = args.use_gpu
    retinanet = torch.load(model_path)

    custom_labels = {"cobia"}
    label_map = {k: v + 1 for v, k in enumerate(custom_labels)}
    label_map["background"] = 0
    rev_label_map = {v: k for k, v in label_map.items()}  # Inverse mapping

    if use_gpu:
        retinanet = retinanet.cuda()

    unnormalize = UnNormalizer()
    retinanet.eval()

    with torch.no_grad():
        st = time.time()
        img = cv2.imread(image_path)
        img = img.astype(np.float32) / 255.0

        mean = np.array([[[0.485, 0.456, 0.406]]])
        std = np.array([[[0.229, 0.224, 0.225]]])
        img = (img.astype(np.float32) - mean) / std

        image_resizer = Resize_Img()
        img = image_resizer(img)["img"]

        img = np.expand_dims(img, axis=0)
        img = collat(img)
        scores, classification, transformed_anchors = retinanet(
            img.cuda().float())
        print("Elapsed time: {}".format(time.time() - st))
        idxs = np.where(scores.cpu() > 0.5)
        img = np.array(255 * unnormalize(img[0, :, :, :])).copy()

        img[img < 0] = 0
        img[img > 255] = 255

        img = np.transpose(img, (1, 2, 0))

        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j in range(idxs[0].shape[0]):
            bbox = transformed_anchors[idxs[0][j], :]
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            label_name = rev_label_map[int(classification[idxs[0][j]])]
            draw_caption(img, (x1, y1, x2, y2), label_name)

            cv2.rectangle(img, (x1, y1), (x2, y2),
                          color=(0, 0, 255),
                          thickness=2)
            print(label_name, x1, y1, x2, y2)

        cv2.imwrite("out.png", img)
Exemplo n.º 4
0
    def __init__(self):
        self.model = None
        self.transform = transforms.Compose([Normalizer(), Resizer()])
        self.unnormalize = UnNormalizer()
        self.overlap_threshold = 0.6
        self.score_threshold = 0.5
        self.distance_threshold = 1.

        self.bboxes = None
Exemplo n.º 5
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')


    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    mAP = csv_eval.evaluate(dataset_val, retinanet)

    print(mAP)
Exemplo n.º 6
0
def detect(checkpoint, pred_on_path, output_path, threshold=0.5, visualize=False, red_label='sick'):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')

    if os.path.exists(output_path):
        shutil.rmtree(output_path)
        os.makedirs(output_path)
    logger.info('inside ' + str(pred_on_path) + ': ' + str(os.listdir(pred_on_path)))
    dataset_val = PredDataset(pred_on_path=pred_on_path,
                              transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param
    logger.info('dataset prepared')
    dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None)
    logger.info('data loader initialized')
    labels = checkpoint['labels']
    logger.info('labels are: ' + str(labels))
    num_classes = len(labels)
    configs = deepcopy(checkpoint['training_configs'])
    configs.update(checkpoint['hp_values'])
    logger.info('initializing object_detection model')
    model = retinanet(depth=checkpoint['depth'], num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    logger.info('loading weights')
    model.load_state_dict(checkpoint['model'])
    model = model.to(device=device)
    logger.info('model to device: ' + str(device))
    model.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
    inference_times = []
    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = model(data['img'].to(device=device).float())
            elapsed_time = time.time() - st
            print('Elapsed time: {}'.format(elapsed_time))
            inference_times.append(elapsed_time)
            idxs = np.where(scores.cpu() > threshold)[0]
            if visualize:
                img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

                img[img < 0] = 0
                img[img > 255] = 255

                img = np.transpose(img, (1, 2, 0))
                img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                if visualize:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])

                label_idx = int(classification[idxs[j]])
                label_name = labels[label_idx]
                score = scores[idxs[j]].item()
                if visualize:
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    if red_label in label_name:
                        cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)
                    print(label_name)

                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
            img_name = dataset_val.image_names[idx].split('/')[-1]
            i_name = img_name.split('.')[0]
            filename = i_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')
            if visualize:
                save_to_path = os.path.join(output_path, img_name)
                cv2.imwrite(save_to_path, img)
                cv2.waitKey(0)
    print('average inference time per image: ', np.mean(inference_times))
    return output_path
Exemplo n.º 7
0
def main(args=None):
	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_test', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--model', help='Path to model (.pt) file.')

	parser = parser.parse_args(args)
	'''
	if parser.dataset == 'coco':
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
	'''
	if parser.dataset == 'csv':
		dataset_test = CSVDataset(train_file=parser.csv_test, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), ValResizer()]), predict=True)
	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	#sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
	dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=0, collate_fn=collater)

	retinanet = torch.load(parser.model)

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	retinanet.eval()

	unnormalize = UnNormalizer()

	def draw_caption(image, box, caption):

		b = np.array(box).astype(int)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)


	image_list = []
	x1_list = []
	width = []
	y1_list = []
	height = []
	label_list = []
	for idx, data in enumerate(dataloader_test):

		with torch.no_grad():
			st = time.time()
			scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
			#print(data['name'][0])
			if (idx+1)%100 == 0:
				print(idx+1)
			#print('Elapsed time: {}'.format(time.time()-st))
			idxs = np.where(scores>0.5)
			img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

			img[img<0] = 0
			img[img>255] = 255

			img = np.transpose(img, (1, 2, 0))

			img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

			for j in range(idxs[0].shape[0]):
				bbox = transformed_anchors[idxs[0][j], :]
				image_list += [data['name'][0]][36:]
				x1 = int(bbox[0])*2
				y1 = int(bbox[1])*2
				x2 = int(bbox[2])*2
				y2 = int(bbox[3])*2
				x1_list += [str(x1)]
				y1_list += [str(y1)]
				width += [str(x2-x1)]
				height += [str(y2-y1)]
				label_list += [1]
				label_name = dataset_test.labels[int(classification[idxs[0][j]])]
			if idxs[0].shape[0] == 0:
				image_list += [data['name'][0]][36:]
				x1_list += ['']
				y1_list += ['']
				width += ['']
				height += ['']
				label_list += [0]
		if (idx+1)%50 == 0:
			print(len(image_list), len(x1_list), len(y1_list), len(width), len(height), len(label_list))
	data = np.array([image_list])
	data = np.append(data, [x1_list], axis=0)
	data = np.append(data, [y1_list], axis=0)
	data = np.append(data, [width], axis=0)
	data = np.append(data, [height], axis=0)
	data = np.append(data, [label_list], axis=0)
	dataframe = pd.DataFrame(data = data.T)
	dataframe.to_csv("prediction.csv",index=False,sep=',')
Exemplo n.º 8
0
import torch
import math
import time
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch.utils.model_zoo as model_zoo
from torch.nn import init
from utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
from anchors import Anchors
import losses
from lib.nms.pth_nms import pth_nms

from dataloader import UnNormalizer
unnormalize = UnNormalizer()


def nms(dets, thresh):
    "Dispatch to either CPU or GPU NMS implementations.\
    Accept dets as tensor" ""
    return pth_nms(dets, thresh)


class PyramidFeatures(nn.Module):
    def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
        super(PyramidFeatures, self).__init__()

        # upsample C5 to get P5 from the FPN paper
        self.P5_1 = nn.Conv2d(C5_size,
                              feature_size,
Exemplo n.º 9
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--save_type',
                        help='Saved model type is state_dict or model file')
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--folder',
                        help='Path to the evaluation images folder')
    parser.add_argument('--rec', type=bool)
    parser.add_argument('--video_file', help="Name of the file to be saved")

    parser = parser.parse_args(args)
    if parser.rec:
        assert parser.video_file is not None
    dataset_val = scotty_dataset(parser.folder,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    dataset_val_viz = scotty_dataset(parser.folder,
                                     transform=transforms.Compose([Resizer()]))
    dataloader_val = DataLoader(dataset_val, num_workers=1, shuffle=False)

    #retinanet = torch.load(parser.model)
    #retinanet = model.resnet18(num_classes=dataset_val.num_classes(),)
    #retinanet.load_state_dict(torch.load(parser.model))
    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    #initialize video wroter object
    if parser.rec:
        print("..Recording Video..")
        record = save_video(parser.video_file)

    pbar = tqdm.tqdm(range(len(dataloader_val)))

    alpha = 0.55

    dummy_input = Variable(torch.rand(1, 3, 224, 224))

    with SummaryWriter(comment='resnet18') as w:
        model = torchvision.models.resnet18()
        w.add_graph(model, (dummy_input, ))


#-------------------------------------------------Initiate Training Loop----------------------------------------------------#
    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():

            torch.cuda.synchronize()
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].permute(0, 3, 1, 2).float().cuda())
            #print ("Image shape: {} ".format(data['img'].permute(0,3,1,2).shape))
            pbar.write('Elapsed time: {}'.format(time.time() - st))
            torch.cuda.synchronize()
            idxs = np.where(scores > 0.5)
            img = np.array(
                unnormalize(torch.squeeze(data["img"]).permute(2, 1,
                                                               0)).permute(
                                                                   2, 1, 0))
            img = cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2RGB)
            img = (255 * img).astype(np.uint8)
            img_dup = img.copy()

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                color = color_id(int(classification[idxs[0][j]]))
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color, -1)
                #print(label_name)

            pbar.update()
            pbar.set_description("Images Processed : {}/{}".format(
                idx, len(dataloader_val)))
            pbar.set_postfix("")

            cv2.addWeighted(img, alpha, img_dup, 1 - alpha, 0, img)

            if parser.rec:
                record.write(img)

            cv2.imshow('img', img)
            cv2.waitKey(0)

    if parser.rec:
        record.close()
Exemplo n.º 10
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--ROI_model', help='Path to ROI model (.pt) file.')
    parser.add_argument('--QRCode_model',
                        help="path to QRcode model(.pt) file")

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose([
                                     Normalizer(ROI_mean, ROI_std),
                                     Resizer()
                                 ]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=None,
                                sampler=None)

    ROI_net = torch.load(parser.ROI_model)
    QRCode_net = torch.load(parser.QRCode_model)

    use_gpu = True

    if use_gpu:
        ROI_net = ROI_net.cuda()
        QRCode_net = QRCode_net.cuda(0)

    ROI_net.eval()
    QRCode_net.eval()

    unnormalize = UnNormalizer(ROI_mean, ROI_std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = ROI_net(
                data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ph, pw, _ = img.shape
                ret = convert_predict_to_origin_bbox(origin_img, pw, ph, x1,
                                                     y1, x2, y2)
                if ret is None:
                    print("ERROR: convert predicted origin bbox error")
                    continue

                x1p, y1p, x2p, y2p = ret
                print("ROI predicted:", x1p, y1p, x2p, y2p)
                output_file.write(fn + ',' + str(x1p) + ',' + str(y1p) + ',' +
                                  str(x2p) + ',' + str(y2p) + ',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
                ROI = origin_img[y1p:y2p, x1p:x2p]
                cv2.rectangle(origin_img, (x1p, y1p), (x2p, y2p),
                              color=(0, 0, 255),
                              thickness=8)
                #import pdb
                #pdb.set_trace()
                ROI = ROI.astype(np.float32) / 255.0
                # normalize it
                ROI_normalized = (ROI - QRCode_mean) / QRCode_std
                #resize it
                rows, cols, cns = ROI_normalized.shape
                smallest_side = min(rows, cols)
                #rescale the image so the smallest side is min_side
                min_side = 600.0
                max_side = 900.0
                scale = min_side / smallest_side
                #check if the largest side is now greater than max_side, which can happen
                # when images have a large aspect ratio
                largest_side = max(rows, cols)
                if largest_side * scale > 900:
                    scale = max_side / largest_side

                # resize the image with the computed scale
                ROI_scale = skimage.transform.resize(
                    ROI_normalized,
                    (int(round(rows * scale)), int(round((cols * scale)))))
                rows, cols, cns = ROI_scale.shape

                pad_w = 32 - rows % 32
                pad_h = 32 - cols % 32

                ROI_padded = np.zeros(
                    (rows + pad_w, cols + pad_h, cns)).astype(np.float32)
                ROI_padded[:rows, :cols, :] = ROI_scale.astype(np.float32)
                x = torch.from_numpy(ROI_padded)
                print('x.shape:', x.shape)
                x = torch.unsqueeze(x, dim=0)
                print('x.shape after unsqueeze:', x.shape)
                x = x.permute(0, 3, 1, 2)
                print('x.shape after permute:', x.shape)

                scores, classification, transformed_anchors = QRCode_net(
                    x.cuda().float())
                print('scores:', scores)
                print('classification;', classification)
                print('transformed_anchors:', transformed_anchors)
                idxs = np.where(scores.cpu() > 0.5)
                predict_height, predict_width, _ = ROI_padded.shape

                for j in range(idxs[0].shape[0]):
                    bbox = transformed_anchors[idxs[0][j], :]
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    print("!!QRCode predicted bbox inside ROI:", x1, y1, x2,
                          y2)

                    ret = convert_predict_to_origin_bbox(
                        ROI, predict_width, predict_height, x1, y1, x2, y2)
                    if ret is None:
                        continue

                    qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2 = ret
                    print('qrcode(bbox):', qrcode_x1, qrcode_y1, qrcode_x2,
                          qrcode_y2)

                    qrcode_img_x1 = x1p + qrcode_x1
                    qrcode_img_y1 = y1p + qrcode_y1
                    qrcode_img_x2 = x1p + qrcode_x2
                    qrcode_img_y2 = y1p + qrcode_y2
                    print('!!!QRCode in image:', qrcode_img_x1, qrcode_img_y1,
                          qrcode_img_x2, qrcode_img_y2)
                    cv2.rectangle(origin_img, (qrcode_img_x1, qrcode_img_y1),
                                  (qrcode_img_x2, qrcode_img_y2),
                                  color=(255, 0, 0),
                                  thickness=8)
                    cv2.imwrite('origin_img_qrcode.png', origin_img)
                    resized = cv2.resize(origin_img, (800, 600))
                    cv2.imshow('result', resized)
            else:
                not_processed_file.write(fn + ",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q' == chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()
Exemplo n.º 11
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                 transform=transforms.Compose([Normalizer(mean, std), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer(mean, std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ret = convert_predict_to_origin_bbox(origin_img, img, x1, y1, x2, y2)
                if ret is None:
                    continue

                x1p, y1p, x2p, y2p = ret
                output_file.write(fn+','+str(x1p)+','+str(y1p)+','+str(x2p)+','+str(y2p)+',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
            else:
                not_processed_file.write(fn+",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q'==chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()
Exemplo n.º 12
0
def get_transcript(image_id, data, retinanet, score_threshold, nms_threshold,
                   dataset_val, alphabet):
    image_name = image_id + '.jpg'
    retinanet.training = False
    gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2])

    pxml = pagexml.PageXML()
    unnormalize = UnNormalizer()
    with torch.no_grad():
        st = time.time()
        im = data['img']
        im = im.cuda().float()
        if retinanet.module.htr_gt_box:
            scores, classification, transformed_anchors, transcriptions = retinanet(
                [im, data['annot']])
        else:
            scores, classification, transformed_anchors, transcriptions = retinanet(
                im)
        idxs = np.where(scores.cpu() > score_threshold)
        img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
        #img = np.array(255 * unnormalize(im)).copy()

        img[img < 0] = 0
        img[img > 255] = 255

        img = np.transpose(img, (1, 2, 0))

        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
        width = img.shape[1]
        height = img.shape[0]

        conf = pagexml.ptr_double()
        pxml.newXml('retinanet_dets', image_name, width, height)
        page = pxml.selectNth("//_:Page", 0)
        reg = pxml.addTextRegion(page)
        pxml.setCoordsBBox(reg, 0, 0, width, height, conf)
        line = pxml.addTextLine(reg)
        pxml.setCoordsBBox(line, 0, 0, width, height, conf)
        words = []
        transcriptions = np.argmax(transcriptions.cpu(), axis=-1)
        for j in range(idxs[0].shape[0]):

            # Initialize object for setting confidence values
            box = {}
            bbox = transformed_anchors[idxs[0][j], :]
            if idxs[0][j] >= transcriptions.shape[0]: continue
            transcription = transcriptions[idxs[0][j], :]
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            label_name = dataset_val.labels[int(classification[idxs[0][j]])]

            # Add a text region to the Page
            word = pxml.addWord(line, "ID" + str(j))

            # Set text region bounding box with a confidence
            pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1, conf)

            #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf )
            #transcription = transcripts[j]
            transcription = labels_to_text(transcription, alphabet)

            # Set the text for the text region
            conf.assign(0.9)
            pxml.setTextEquiv(word, transcription, conf)

            # Add property to text region
            pxml.setProperty(word, "category", label_name)

            # Add a second page with a text region and specific id
            #page = pxml.addPage("example_image_2.jpg", 300, 300)
            #reg = pxml.addTextRegion( page, "regA" )
            #pxml.setCoordsBBox( reg, 15, 12, 76, 128 )
            words.append(word)
        words = pxml.select('//_:Word')
        order, groups = pxml.getLeftRightTopBottomReadingOrder(
            words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0)
        line = pxml.selectNth('//_:TextLine')
        group_idx = 0
        idx_in_group = 0
        transcript_pred = []
        for n in order:
            word_idx = order.index(n)
            if idx_in_group >= groups[group_idx]:
                group_idx += 1
                idx_in_group = 0
            transcript_pred.append(pxml.getTextEquiv(words[n]))
            pxml.setProperty(words[n], 'word_idx', str(word_idx))
            pxml.setProperty(words[n], "line", str(group_idx))
            pxml.moveElem(words[n], line)
            idx_in_group += 1
        image_text = image_id + '.txt'
        # Write XML to file
        return " ".join(transcript_pred)
Exemplo n.º 13
0
def infer(img_dir,classes_csv,model_fname,resnet_depth,score_thresh,out_dir, results_fname):

    # Create dataset
    img_list = []
    if not isinstance(img_dir, list):
        img_dir = [img_dir]
    for dir in img_dir:
        for file in os.listdir(dir):
            if file.endswith(".png"):
                img_list.append(dir + file)

    dataset_val = CustomDataset(img_list=img_list, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Resizer()]))
    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
    print(dataset_val.num_classes())

    # Create the model
    if resnet_depth == 18:
        retinanet = model.resnet18(num_classes=dataset_val.num_classes())
    elif resnet_depth == 34:
        retinanet = model.resnet34(num_classes=dataset_val.num_classes())
    elif resnet_depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes())
    elif resnet_depth == 101:
        retinanet = model.resnet101(num_classes=dataset_val.num_classes())
    elif resnet_depth == 152:
        retinanet = model.resnet152(num_classes=dataset_val.num_classes())
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    state_dict = torch.load(model_fname)
    from collections import OrderedDict

    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    # load params
    retinanet.load_state_dict(new_state_dict)

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    results = []

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}, Num objects: {}'.format(time.time() - st, len(scores)))

            idxs = np.where(scores > score_thresh)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0)).astype(np.uint8).copy()

            bboxes = []
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0] / data['scale'][0])
                y1 = int(bbox[1] / data['scale'][0])
                x2 = int(bbox[2] / data['scale'][0])
                y2 = int(bbox[3] / data['scale'][0])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

                score = float(scores[idxs[0][j]])

                bboxes.append([x1, y1, x2, y2, score])

            img_fname = ntpath.basename(data['img_fname'][0])
            results.append([img_fname, bboxes])
    #         fig, ax = plt.subplots(figsize=(12, 12))
    #         ax.imshow(img, interpolation='bilinear')

    with open(out_dir+results_fname,"wb") as output_file:
        pickle.dump(results, output_file)
Exemplo n.º 14
0
def generate_pagexml(image_id, data, retinanet, score_threshold, dataset_val,
                     nms_threshold):
    image_name = image_id + '.jpg'
    file = 'pagexmls/' + image_name
    alphabet = " abcdefghijklmnopqrstuvwxy z"

    colors = get_n_random_colors(len(dataset_val.labels))
    gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2])

    pxml = pagexml.PageXML()
    unnormalize = UnNormalizer()
    with torch.no_grad():
        st = time.time()
        im = data['img']

        im = im.cuda().float()
        scores, classification, transformed_anchors = retinanet(
            [im, nms_threshold])
        print('Elapsed time: {}'.format(time.time() - st))
        idxs = np.where(scores > score_threshold)
        img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
        #img = np.array(255 * unnormalize(im)).copy()

        img[img < 0] = 0
        img[img > 255] = 255

        img = np.transpose(img, (1, 2, 0))

        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
        width = img.shape[1]
        height = img.shape[0]
        cv2.imwrite(file, img)

        conf = pagexml.ptr_double()
        pxml.newXml('retinanet_dets', image_name, width, height)
        page = pxml.selectNth("//_:Page", 0)
        reg = pxml.addTextRegion(page)
        pxml.setCoordsBBox(reg, 0, 0, width, height)
        line = pxml.addTextLine(reg)
        pxml.setCoordsBBox(line, 0, 0, width, height)
        words = []
        for k in range(len(dataset_val.labels)):
            cv2.putText(img, dataset_val.labels[k], (25, 25 + k * 15),
                        cv2.FONT_HERSHEY_PLAIN, 1, colors[k], 2)

        for j in range(idxs[0].shape[0]):

            # Initialize object for setting confidence values
            box = {}
            bbox = transformed_anchors[idxs[0][j], :]
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            label_name = dataset_val.labels[int(classification[idxs[0][j]])]

            cv2.rectangle(img, (x1, y1), (x2, y2),
                          color=colors[int(classification[idxs[0][j]])],
                          thickness=2)

            # Add a text region to the Page
            word = pxml.addWord(line, "ID" + str(j))

            # Set text region bounding box with a confidence
            pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1)

            #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf )

            transcripts = []
            confs = []
            seq_len = int(bbox[4])
            for k in range(seq_len + 1):
                transcripts.append(
                    np.argmax(bbox[(5 + k * 27):((5 + (k + 1) * 27))]))
            transcripts = np.array(transcripts)
            transcript = labels_to_text(transcripts, alphabet)
            draw_caption(
                img, (x1, y1, x2, y2), "".join([
                    alphabet[transcripts[k]] for k in range(len(transcripts))
                ]))

            # Set the text for the text region
            conf.assign(1)
            pxml.setTextEquiv(
                word, "".join([
                    alphabet[transcripts[k]] for k in range(len(transcripts))
                ]))

            # Add property to text region
            pxml.setProperty(word, "category", label_name)

            words.append(word)
        words = pxml.select('//_:Word')
        order, groups = pxml.getLeftRightTopBottomReadingOrder(
            words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0)
        line = pxml.selectNth('//_:TextLine', 0)
        group_idx = 0
        idx_in_group = 0
        #line= pxml.addTextLine(reg,"ID"+str(group_idx+1))
        for n in order:
            word_idx = order.index(n)

            if idx_in_group >= groups[group_idx]:
                #line = pxml.selectNth('//_:TextLine',group_idx,reg)
                #line= pxml.selectNth(reg)
                group_idx += 1
                idx_in_group = 0

            pxml.setProperty(words[n], 'word_idx', str(word_idx))
            pxml.setProperty(words[n], "line", str(group_idx))
            pxml.moveElem(words[n], line)
            idx_in_group += 1

        # Write XML to file
        pxml.write('pagexmls/' + gtxml_name + ".xml")
        cv2.imwrite(str(image_id) + '.jpg', img)
Exemplo n.º 15
0
def generate_pagexml(image_id, data, retinanet, score_threshold, nms_threshold,
                     dataset_val):
    image_name = image_id + '.jpg'
    im_file_out = 'pagexmls/' + image_name
    alphabet = retinanet.alphabet
    #retinanet.score_threshold = torch.tensor(score_threshold).cuda().float()
    colors = get_n_random_colors(len(dataset_val.labels))
    gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2])

    pxml = pagexml.PageXML()
    unnormalize = UnNormalizer()
    with torch.no_grad():
        st = time.time()
        im = data['img']

        im = im.cuda().float()
        print(retinanet.htr_gt_box)
        if retinanet.htr_gt_box:
            scores, classification, transformed_anchors, transcriptions = retinanet(
                [im, data['annot']])
            score_threshold = 0
        else:
            scores, classification, transformed_anchors, transcriptions = retinanet(
                im)

        n_boxes_predicted = transformed_anchors.shape[0]
        print(n_boxes_predicted, "BOXES PREDICTED")

        img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

        img[img < 0] = 0
        img[img > 255] = 255

        img = np.transpose(img, (1, 2, 0))

        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
        width = img.shape[1]
        height = img.shape[0]
        cv2.imwrite(im_file_out, img)

        conf = pagexml.ptr_double()
        pxml.newXml('retinanet_dets', image_name, width, height)
        page = pxml.selectNth("//_:Page", 0)
        reg = pxml.addTextRegion(page)
        pxml.setCoordsBBox(reg, 0, 0, width, height, conf)
        line = pxml.addTextLine(reg)
        pxml.setCoordsBBox(line, 0, 0, width, height, conf)
        words = []
        for k in range(len(dataset_val.labels)):
            cv2.putText(img, dataset_val.labels[k], (25, 25 + k * 15),
                        cv2.FONT_HERSHEY_PLAIN, 1, colors[k], 2)
        transcriptions = np.argmax(transcriptions.cpu(), axis=-1)
        for box_id in range(n_boxes_predicted):

            # Initialize object for setting confidence values
            box = {}
            bbox = transformed_anchors[box_id, :]
            transcription = transcriptions[box_id, :]
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            label_name = dataset_val.labels[int(classification[box_id])]

            cv2.rectangle(img, (x1, y1), (x2, y2),
                          color=colors[int(classification[box_id])],
                          thickness=2)

            # Add a text region to the Page
            word = pxml.addWord(line, "ID" + str(box_id))

            # Set text region bounding box with a confidence
            pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1, conf)

            #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf )
            #transcription = transcripts[j]
            transcription = labels_to_text(transcription, alphabet)
            draw_caption(img, (x1, y1, x2, y2), transcription)

            # Set the text for the text region
            conf.assign(0.9)
            pxml.setTextEquiv(word, transcription, conf)

            # Add property to text region
            pxml.setProperty(word, "category", label_name)

            # Add a second page with a text region and specific id
            #page = pxml.addPage("example_image_2.jpg", 300, 300)
            #reg = pxml.addTextRegion( page, "regA" )
            #pxml.setCoordsBBox( reg, 15, 12, 76, 128 )
            words.append(word)
        words = pxml.select('//_:Word')
        order, groups = pxml.getLeftRightTopBottomReadingOrder(
            words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0)
        line = pxml.selectNth('//_:TextLine')
        group_idx = 0
        idx_in_group = 0
        for n in order:
            word_idx = order.index(n)
            if idx_in_group >= groups[group_idx]:
                group_idx += 1
                idx_in_group = 0

            pxml.setProperty(words[n], 'word_idx', str(word_idx))
            pxml.setProperty(words[n], "line", str(group_idx))
            pxml.moveElem(words[n], line)
            idx_in_group += 1

        # Write XML to file
        pxml.write('pagexmls/' + gtxml_name + ".xml")
        cv2.imwrite(os.path.join('pred_sample_ims',
                                 str(image_id) + '.jpg'), img)
Exemplo n.º 16
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--csv_train')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    cnt = 1
    f = open("./output_files/output_retinanet_INbreast.txt", 'w')
    for idx, data in enumerate(dataloader_val):
        #print("data")
        #print(data)
        #print(data.keys())
        print("SCale")
        print(data['scale'])
        scale = data['scale'][0]
        with torch.no_grad():
            name = dataset_val.names[idx]
            arr = name.split("/")
            filename = arr[len(arr) - 1]
            #print("NAMEE", filename)
            temp = data['filename'][0]
            temparr = temp.split("/")
            resname = temparr[len(temparr) - 1]
            print(resname)
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].cuda().float())
            #print('Elapsed time: {}'.format(time.time()-st))
            idxs = np.where(scores >= 0.001)
            #print("idxs")
            #print(idxs)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            scores = scores.cpu().numpy()

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                score = scores[j]
                print(score)
                f.write(resname + "," + str(x1 / scale) + "," +
                        str(y1 / scale) + "," + str(x2 / scale) + "," +
                        str(y2 / scale) + "," + str(scores[j]) + "," +
                        label_name + "\n")
                #name = dataset_val.image_names[idx]
                #arr = name.split("/")
                #filename = arr[len(arr)-1]
                #print("NAMEE", filename)

                print(x1, y1, x2, y2)
                if (scores[j] >= 0.32):
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    cv2.rectangle(img, (x1, y1), (x2, y2),
                                  color=(0, 0, 255),
                                  thickness=2)
                #print(label_name)

            #cv2.imshow('img', img)
            #filename = findfilename(img)
            #print("FILENAMEEEEEEEEEEEEEEEEEEEE")
            #temp = data['filename'][0]
            #temparr = temp.split("/")
            #resname = temparr[len(temparr)-1]
            cv2.imwrite('./results/' + resname, img)
            cnt += 1
            #break
            #cv2.waitKey(0)
            #cv2.destroyAllWindows()

    f.close()
def visualize(csv_val, csv_classes, model):

    dataset = "csv"

    if dataset == 'csv':
        dataset_val = CSVDataset(train_file=csv_val,
                                 class_list=csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 180, 0), 1)

    def draw_caption_original(image, box, caption):

        b = np.array(box).astype(int)
        #print("b", b)
        cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 180), 1)  #B

    kaggle_ouput = []
    for idx, data in enumerate(dataloader_val):
        print(idx)
        kaggle_row = []
        with torch.no_grad():
            st = time.time()
            #print("data shape:", data['img'].shape)
            scores, classification, transformed_anchors = retinanet(
                data['img'].cuda().float())
            #print('Elapsed time: {}'.format(time.time()-st))
            idxs = np.where(scores > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            print('Scores', scores)
            #print("name", data['name'])

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            kaggle_row.append(get_filename(data['name'][0]))
            row = ''
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), "Predicted opacity")

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 255, 0),
                              thickness=2)
                #print(x1, y1, x2, y2)
                if (j == 0):
                    row = row + str(round(
                        scores[j].item(), 2)) + " " + str(x1) + ' ' + str(
                            y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1)
                    pass
                else:
                    row = row + " " + str(round(
                        scores[j].item(), 2)) + " " + str(x1) + ' ' + str(
                            y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1)

            for ann in data['annot']:
                for annotation in ann:
                    #print("Original annot:", ann)
                    if annotation[0] != -1:
                        draw_caption_original(img,
                                              (annotation[0], annotation[1],
                                               annotation[2], annotation[3]),
                                              "Real opacity")

                    cv2.rectangle(img, (annotation[0], annotation[1]),
                                  (annotation[2], annotation[3]),
                                  color=(0, 0, 255),
                                  thickness=2)
                pass

            cv2.imshow('img', img)
            kaggle_row.append(row)
            #print(kaggle_row)
            #print(idxs)
            kaggle_ouput.append(kaggle_row)
            cv2.waitKey(0)

    import pandas as pd
    pd.DataFrame(kaggle_ouput, columns=[
        'patientId', 'PredictionString'
    ]).to_csv("/home/jdmaestre/PycharmProjects/test_kaggle.csv")
Exemplo n.º 18
0
def bbox_extraction(file_list='./data/images2.csv'):
    weights_path = './models/csv_retinanet_25.pt'
    csv_classes = './classes.csv'

    dataset_val = CSVDataset(train_file=file_list,
                             class_list=csv_classes,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))
    # dataset_val = CSVDataset(train_file=file_list, class_list= csv_classes, transform=transforms.Compose([Normalizer()]))
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=False)
    retinanet.load_state_dict(torch.load(weights_path))

    use_gpu = True
    if torch.cuda.is_available():
        device = torch.device("cuda")
    if use_gpu:
        retinanet = retinanet.to(device)

    retinanet.eval()

    unnormalize = UnNormalizer()

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            scores, classification, transformed_anchors = retinanet(
                data['img'].to(device).float())

            def get_bbox(classification, transformed_anchors, label=0):
                bbox = {}
                idx = np.where(classification == label)[0][0]
                co_ord = transformed_anchors[idx, :]
                bbox['x1'] = int(co_ord[0])
                bbox['y1'] = int(co_ord[1])
                bbox['x2'] = int(co_ord[2])
                bbox['y2'] = int(co_ord[3])

                return bbox

            scores = scores.cpu().numpy()
            classification = classification.cpu().numpy()
            transformed_anchors = transformed_anchors.cpu().numpy()
            # print('scores:',scores)
            # print('classification:', classification)
            # print('transformed_anchors', transformed_anchors)
            bbox = {}
            bbox['neck'] = get_bbox(classification,
                                    transformed_anchors,
                                    label=0)
            bbox['stomach'] = get_bbox(classification,
                                       transformed_anchors,
                                       label=1)

            # print('neck',bbox['neck'] )
            # print('stomach',bbox['stomach'] )

            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            cv2.rectangle(img, (bbox['neck']['x1'], bbox['neck']['y1']),
                          (bbox['neck']['x2'], bbox['neck']['y2']),
                          color=(0, 0, 255),
                          thickness=2)
            cv2.rectangle(img, (bbox['stomach']['x1'], bbox['stomach']['y1']),
                          (bbox['stomach']['x2'], bbox['stomach']['y2']),
                          color=(0, 0, 255),
                          thickness=2)

            # cv2.imshow('img', img)
            # cv2.imwrite('./sample_11.jpg',img)
            # cv2.waitKey(0)

            return bbox


# bbox_extraction()

# if __name__ == '__main__':
#  main()
Exemplo n.º 19
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                 transform=transforms.Compose([Normalizer(), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)

            cv2.imshow('img', img)
            cv2.waitKey(0)
Exemplo n.º 20
0
def predict(folder_path, model_path):

    dataset_val = datasets.ImageFolder(folder_path,  transform=transforms.Compose([Normalizer_only_image(), Resizer_only_img()]))
    # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, collate_fn=collater_image_only, num_workers=1, batch_size=1)
    print(dataloader_val.dataset)

    #for asd in dataloader_val:
    #    print(asd)
    #    pass



    retinanet = torch.load(model_path)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        print("idx", idx)
        with torch.no_grad():
            st = time.time()
            dataa = data[0]
            dataa = dataa.view(1,3,640,640)
            #print("shape dataaa:", dataa.shape)
            scores, classification, transformed_anchors = retinanet(dataa.cuda().float())
            #print('Elapsed time: {}'.format(time.time()-st))
            idxs = np.where(scores>0.5)
            img = np.array(255 * unnormalize(dataa[0, :, :, :])).copy()

            #print("Classification", classification)
            #print("Scores", scores)
            #print("transformed_anchors", transformed_anchors)
            # print("Shape", img.shape)


            img[img<0] = 0
            img[img>255] = 255

            img = np.transpose(img, (1, 2, 0))

            import matplotlib.pyplot as plt
            plt.imshow(img, cmap='gray')
            #plt.show()

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                #label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), 'Opacity')

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                #print(label_name)

            cv2.imshow('img', img)
            cv2.waitKey(0)