示例#1
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    test_set = CocoDataset(opt.data_path, 2017, "val",
                           SSDTransformer(dboxes, (300, 300), val=True))
    encoder = Encoder(dboxes)

    if os.path.isdir(opt.output):
        shutil.rmtree(opt.output)
    os.makedirs(opt.output)

    for img, img_id, img_size, _, _ in test_set:
        if img is None:
            continue
        if torch.cuda.is_available():
            img = img.cuda()
        with torch.no_grad():
            ploc, plabel = model(img.unsqueeze(dim=0))
            result = encoder.decode_batch(ploc, plabel, opt.nms_threshold,
                                          20)[0]
            loc, label, prob = [r.cpu().numpy() for r in result]
            best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
            loc = loc[best]
            label = label[best]
            prob = prob[best]
            if len(loc) > 0:
                path = test_set.coco.loadImgs(img_id)[0]["file_name"]
                output_img = cv2.imread(
                    os.path.join(opt.data_path, "val2017", path))
                height, width, _ = output_img.shape
                loc[:, 0::2] *= width
                loc[:, 1::2] *= height
                loc = loc.astype(np.int32)
                for box, lb, pr in zip(loc, label, prob):
                    category = test_set.label_info[lb]
                    color = colors[lb]
                    xmin, ymin, xmax, ymax = box
                    cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax),
                                  color, 2)
                    text_size = cv2.getTextSize(category + " : %.2f" % pr,
                                                cv2.FONT_HERSHEY_PLAIN, 1,
                                                1)[0]
                    cv2.rectangle(
                        output_img, (xmin, ymin),
                        (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                        color, -1)
                    cv2.putText(output_img, category + " : %.2f" % pr,
                                (xmin, ymin + text_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
                    cv2.imwrite(
                        "{}/{}_prediction.jpg".format(opt.output, path[:-4]),
                        output_img)
示例#2
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    transformer = SSDTransformer(dboxes, (300, 300), val=True)
    img = Image.open(opt.input).convert("RGB")
    img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1))
    encoder = Encoder(dboxes)

    if torch.cuda.is_available():
        img = img.cuda()
    with torch.no_grad():
        ploc, plabel = model(img.unsqueeze(dim=0))
        result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0]
        loc, label, prob = [r.cpu().numpy() for r in result]
        best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
        loc = loc[best]
        label = label[best]
        prob = prob[best]
        output_img = cv2.imread(opt.input)
        if len(loc) > 0:
            height, width, _ = output_img.shape
            loc[:, 0::2] *= width
            loc[:, 1::2] *= height
            loc = loc.astype(np.int32)
            for box, lb, pr in zip(loc, label, prob):
                category = coco_classes[lb]
                color = colors[lb]
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2)
                text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
                cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color,
                              -1)
                cv2.putText(
                    output_img, category + " : %.2f" % pr,
                    (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1,
                    (255, 255, 255), 1)
        if opt.output is None:
            output = "{}_prediction.jpg".format(opt.input[:-4])
        else:
            output = opt.output
        cv2.imwrite(output, output_img)
示例#3
0
        draw.rectangle(xy=[l + 1. for l in box_location], outline=box_color)

        # Text (class label)
        text_size = font.getsize(box_label_name.upper())
        text_location = [box_location[0] + 2., box_location[1] - text_size[1]]
        textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,box_location[1]]
        draw.rectangle(xy=textbox_location, fill=box_color)
        draw.text(xy=text_location, text=box_label_name.upper(), fill='white')
    
    annotated_image.save(args.image_save_path+'p_'+args.test_image.split('/')[-1])
    del draw

if __name__ == '__main__':
    args = get_infer_argument()
    set_cuda_dev(args.ngpu)
    print('Arguments for inference : ', args)

    # Load model checkpoint
    model = SSD('test', args)
    checkpoint = args.trained_model # '*.pth'
    _, model, _ = load_checkpoint(model, args.trained_model_path+checkpoint)
    model = model.cuda()
    model.eval()
    
    with torch.no_grad():
        img_example = args.test_image # With absolute path (ex. /media/dataset/VOC2007/JPEGImages/000001.jpg)
        original_image = Image.open(img_example, mode='r')
        original_image = original_image.convert('RGB')
        detect_image(original_image, args)
        print('Detect image finished!')
示例#4
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    transformer = SSDTransformer(dboxes, (300, 300), val=True)
    cap = cv2.VideoCapture(opt.input)
    if opt.output is None:
        output = "{}_prediction.mp4".format(opt.input[:-4])
    else:
        output = opt.output
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"),
                          int(cap.get(cv2.CAP_PROP_FPS)), (width, height))
    encoder = Encoder(dboxes)
    while cap.isOpened():
        flag, frame = cap.read()
        output_frame = np.copy(frame)
        if flag:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
            break
        frame = Image.fromarray(frame)
        frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4),
                                     torch.zeros(1))

        if torch.cuda.is_available():
            frame = frame.cuda()
        with torch.no_grad():
            ploc, plabel = model(frame.unsqueeze(dim=0))
            result = encoder.decode_batch(ploc, plabel, opt.nms_threshold,
                                          20)[0]
            loc, label, prob = [r.cpu().numpy() for r in result]
            best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
            loc = loc[best]
            label = label[best]
            prob = prob[best]
            if len(loc) > 0:
                loc[:, 0::2] *= width
                loc[:, 1::2] *= height
                loc = loc.astype(np.int32)
                for box, lb, pr in zip(loc, label, prob):
                    category = coco_classes[lb]
                    color = colors[lb]
                    xmin, ymin, xmax, ymax = box
                    cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax),
                                  color, 2)
                    text_size = cv2.getTextSize(category + " : %.2f" % pr,
                                                cv2.FONT_HERSHEY_PLAIN, 1,
                                                1)[0]
                    cv2.rectangle(
                        output_frame, (xmin, ymin),
                        (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                        color, -1)
                    cv2.putText(output_frame, category + " : %.2f" % pr,
                                (xmin, ymin + text_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

        out.write(output_frame)
    cap.release()
    out.release()