def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) encoder = Encoder(dboxes) if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) for img, img_id, img_size, _, _ in test_set: if img is None: continue if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: path = test_set.coco.loadImgs(img_id)[0]["file_name"] output_img = cv2.imread( os.path.join(opt.data_path, "val2017", path)) height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = test_set.label_info[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imwrite( "{}/{}_prediction.jpg".format(opt.output, path[:-4]), output_img)
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) img = Image.open(opt.input).convert("RGB") img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1)) encoder = Encoder(dboxes) if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] output_img = cv2.imread(opt.input) if len(loc) > 0: height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) if opt.output is None: output = "{}_prediction.jpg".format(opt.input[:-4]) else: output = opt.output cv2.imwrite(output, output_img)
draw.rectangle(xy=[l + 1. for l in box_location], outline=box_color) # Text (class label) text_size = font.getsize(box_label_name.upper()) text_location = [box_location[0] + 2., box_location[1] - text_size[1]] textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,box_location[1]] draw.rectangle(xy=textbox_location, fill=box_color) draw.text(xy=text_location, text=box_label_name.upper(), fill='white') annotated_image.save(args.image_save_path+'p_'+args.test_image.split('/')[-1]) del draw if __name__ == '__main__': args = get_infer_argument() set_cuda_dev(args.ngpu) print('Arguments for inference : ', args) # Load model checkpoint model = SSD('test', args) checkpoint = args.trained_model # '*.pth' _, model, _ = load_checkpoint(model, args.trained_model_path+checkpoint) model = model.cuda() model.eval() with torch.no_grad(): img_example = args.test_image # With absolute path (ex. /media/dataset/VOC2007/JPEGImages/000001.jpg) original_image = Image.open(img_example, mode='r') original_image = original_image.convert('RGB') detect_image(original_image, args) print('Detect image finished!')
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) cap = cv2.VideoCapture(opt.input) if opt.output is None: output = "{}_prediction.mp4".format(opt.input[:-4]) else: output = opt.output height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"), int(cap.get(cv2.CAP_PROP_FPS)), (width, height)) encoder = Encoder(dboxes) while cap.isOpened(): flag, frame = cap.read() output_frame = np.copy(frame) if flag: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: break frame = Image.fromarray(frame) frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4), torch.zeros(1)) if torch.cuda.is_available(): frame = frame.cuda() with torch.no_grad(): ploc, plabel = model(frame.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_frame, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_frame, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) out.write(output_frame) cap.release() out.release()