示例#1
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--ROI_model', help='Path to ROI model (.pt) file.')
    parser.add_argument('--QRCode_model',
                        help="path to QRcode model(.pt) file")

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose([
                                     Normalizer(ROI_mean, ROI_std),
                                     Resizer()
                                 ]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=None,
                                sampler=None)

    ROI_net = torch.load(parser.ROI_model)
    QRCode_net = torch.load(parser.QRCode_model)

    use_gpu = True

    if use_gpu:
        ROI_net = ROI_net.cuda()
        QRCode_net = QRCode_net.cuda(0)

    ROI_net.eval()
    QRCode_net.eval()

    unnormalize = UnNormalizer(ROI_mean, ROI_std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = ROI_net(
                data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ph, pw, _ = img.shape
                ret = convert_predict_to_origin_bbox(origin_img, pw, ph, x1,
                                                     y1, x2, y2)
                if ret is None:
                    print("ERROR: convert predicted origin bbox error")
                    continue

                x1p, y1p, x2p, y2p = ret
                print("ROI predicted:", x1p, y1p, x2p, y2p)
                output_file.write(fn + ',' + str(x1p) + ',' + str(y1p) + ',' +
                                  str(x2p) + ',' + str(y2p) + ',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
                ROI = origin_img[y1p:y2p, x1p:x2p]
                cv2.rectangle(origin_img, (x1p, y1p), (x2p, y2p),
                              color=(0, 0, 255),
                              thickness=8)
                #import pdb
                #pdb.set_trace()
                ROI = ROI.astype(np.float32) / 255.0
                # normalize it
                ROI_normalized = (ROI - QRCode_mean) / QRCode_std
                #resize it
                rows, cols, cns = ROI_normalized.shape
                smallest_side = min(rows, cols)
                #rescale the image so the smallest side is min_side
                min_side = 600.0
                max_side = 900.0
                scale = min_side / smallest_side
                #check if the largest side is now greater than max_side, which can happen
                # when images have a large aspect ratio
                largest_side = max(rows, cols)
                if largest_side * scale > 900:
                    scale = max_side / largest_side

                # resize the image with the computed scale
                ROI_scale = skimage.transform.resize(
                    ROI_normalized,
                    (int(round(rows * scale)), int(round((cols * scale)))))
                rows, cols, cns = ROI_scale.shape

                pad_w = 32 - rows % 32
                pad_h = 32 - cols % 32

                ROI_padded = np.zeros(
                    (rows + pad_w, cols + pad_h, cns)).astype(np.float32)
                ROI_padded[:rows, :cols, :] = ROI_scale.astype(np.float32)
                x = torch.from_numpy(ROI_padded)
                print('x.shape:', x.shape)
                x = torch.unsqueeze(x, dim=0)
                print('x.shape after unsqueeze:', x.shape)
                x = x.permute(0, 3, 1, 2)
                print('x.shape after permute:', x.shape)

                scores, classification, transformed_anchors = QRCode_net(
                    x.cuda().float())
                print('scores:', scores)
                print('classification;', classification)
                print('transformed_anchors:', transformed_anchors)
                idxs = np.where(scores.cpu() > 0.5)
                predict_height, predict_width, _ = ROI_padded.shape

                for j in range(idxs[0].shape[0]):
                    bbox = transformed_anchors[idxs[0][j], :]
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    print("!!QRCode predicted bbox inside ROI:", x1, y1, x2,
                          y2)

                    ret = convert_predict_to_origin_bbox(
                        ROI, predict_width, predict_height, x1, y1, x2, y2)
                    if ret is None:
                        continue

                    qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2 = ret
                    print('qrcode(bbox):', qrcode_x1, qrcode_y1, qrcode_x2,
                          qrcode_y2)

                    qrcode_img_x1 = x1p + qrcode_x1
                    qrcode_img_y1 = y1p + qrcode_y1
                    qrcode_img_x2 = x1p + qrcode_x2
                    qrcode_img_y2 = y1p + qrcode_y2
                    print('!!!QRCode in image:', qrcode_img_x1, qrcode_img_y1,
                          qrcode_img_x2, qrcode_img_y2)
                    cv2.rectangle(origin_img, (qrcode_img_x1, qrcode_img_y1),
                                  (qrcode_img_x2, qrcode_img_y2),
                                  color=(255, 0, 0),
                                  thickness=8)
                    cv2.imwrite('origin_img_qrcode.png', origin_img)
                    resized = cv2.resize(origin_img, (800, 600))
                    cv2.imshow('result', resized)
            else:
                not_processed_file.write(fn + ",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q' == chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()
示例#2
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                 transform=transforms.Compose([Normalizer(mean, std), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer(mean, std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ret = convert_predict_to_origin_bbox(origin_img, img, x1, y1, x2, y2)
                if ret is None:
                    continue

                x1p, y1p, x2p, y2p = ret
                output_file.write(fn+','+str(x1p)+','+str(y1p)+','+str(x2p)+','+str(y2p)+',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
            else:
                not_processed_file.write(fn+",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q'==chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()