def __init__(self, data_dir, img_size, confthre, nmsthre, vis=False): """ Args: data_dir (str): dataset root directory img_size (int): image size after preprocess. images are resized \ to squares whose shape is (img_size, img_size). confthre (float): confidence threshold ranging from 0 to 1, \ which is defined in the config file. nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1. """ test_sets = [ ('ENHANCE1', 'test'), ] self.dataset = SWIMDetection( root=data_dir, image_sets=test_sets, input_dim=img_size, preproc=ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ) self.num_images = len(self.dataset) self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=1, shuffle=False, num_workers=0) self.img_size = img_size self.confthre = confthre self.nmsthre = nmsthre self.vis = vis
def demo(): model = build_model() if os.path.isdir(data_f): all_imgs = glob.glob(os.path.join(data_f, '*.jpg')) for img in all_imgs: print('~~~~~ predict on img: {}'.format(img)) im = cv2.imread(img) ori_im = im.copy() height, width, _ = im.shape transform = ValTransform(rgb_means=( 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) im_input, _ = transform(im, None, target_size) im_input = im_input.to(device).unsqueeze(0) with torch.no_grad(): out = model(im_input) outputs = postprocess(out, num_classes, 0.01, 0.65) outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] bboxes[:, 0::2] *= width / target_size[0] bboxes[:, 1::2] *= height / target_size[1] cls = outputs[:, 6] scores = outputs[:, 4] * outputs[:, 5] if isinstance(bboxes, torch.Tensor): bboxes = bboxes.cpu().numpy() res = visualize_det_cv2_part( im, scores, cls, bboxes, coco_label_map_list[1:], 0.1) cv2.imshow('rr', res) cv2.waitKey(0)
def demo(): args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.safe_load(f) print("successfully loaded config file: ", cfg) backbone = cfg['MODEL']['BACKBONE'] test_size = (args.test_size, args.test_size) if args.dataset == 'COCO': class_names = COCO_CLASSES num_class = 80 elif args.dataset == 'VOC': class_names = VOC_CLASSES num_class = 20 else: raise Exception("Only support COCO or VOC model now!") onnx_model = onnx.load('weights/yolov3_asff.onnx') onnx.checker.check_model(onnx_model) print('onnx model checked.') #load img if os.path.isdir(args.img): all_imgs = glob.glob(os.path.join(args.img, '*.jpg')) for img in all_imgs: print('~~~~~ predict on img: {}'.format(img)) transform = ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) im = cv2.imread(img) height, width, _ = im.shape ori_im = im.copy() im_input, _ = transform(im, None, test_size) tic = time.time() # outputs= model(im_input) print('cost: {}'.format(time.time() - tic)) outputs = postprocess(outputs, num_class, 0.01, 0.65) # outputs = outputs[0].cpu().data # bboxes = outputs[:, 0:4] # bboxes[:, 0::2] *= width / test_size[0] # bboxes[:, 1::2] *= height / test_size[1] # bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] # bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] # cls = outputs[:, 6] # scores = outputs[:, 4]* outputs[:,5] # pred_im=vis(ori_im, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=class_names) # cv2.imshow('Detection', pred_im) # cv2.waitKey(0) elif 'mp4' in args.img: cam = cv2.VideoCapture(args.img)
def __init__(self, data_dir, img_size, confthre, nmsthre, testset=False, voc=False, vis=False, classes=COCO_CLASSES): """ Args: data_dir (str): dataset root directory img_size (int): image size after preprocess. images are resized \ to squares whose shape is (img_size, img_size). confthre (float): confidence threshold ranging from 0 to 1, \ which is defined in the config file. nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1. """ json_f = 'instances_val2017.json' name = 'val2017' if testset: json_f = 'image_info_test-dev2017.json' name = 'test2017' if voc: json_f = 'pascal_test2007.json' self.testset = testset self.dataset = COCODataset(data_dir=data_dir, img_size=img_size, json_file=json_f, preproc=ValTransform( rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), name=name, voc=voc, classes=classes) self.num_classes = len(classes) self.num_images = len(self.dataset) self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=1, shuffle=False, num_workers=0) self.img_size = img_size self.confthre = confthre self.nmsthre = nmsthre self.voc = voc self.vis = vis
def detect(self, img_pth, img): #load img transform = ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) #im = cv2.imread(args.img) im = cv2.imread(os.path.join(img_pth, img)) height, width, _ = im.shape ori_im = im.copy() im_input, _ = transform(im, None, self.test_size) if self.cfg['MODEL']['USE_CUDA']: im_input = im_input.to(self.device) im_input = Variable(im_input.type(self.dtype).unsqueeze(0)) outputs = self.model(im_input) #xc,yc, w, h outputs = postprocess(outputs, self.num_class, 0.1, 0.65) outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] #x1, y1, x2, y2 bboxes[:, 0::2] *= width / self.test_size[0] #rescale bboxes[:, 1::2] *= height / self.test_size[1] #rescale #bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] # w #bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] # h cls = outputs[:, 6] scores = outputs[:, 4] * outputs[:, 5] pred_im = vis(ori_im, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=self.class_names) cv2.imshow('Detection', pred_im) cv2.imwrite(os.path.join(self.cfg['TEST']['SAVED'], img), pred_im) cv2.waitKey(0) cv2.destroyAllWindows() return bboxes.numpy(), cls.numpy(), scores.numpy()
def demo(): """ YOLOv3 demo. See README for details. """ args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.safe_load(f) print("successfully loaded config file: ", cfg) backbone=cfg['MODEL']['BACKBONE'] test_size = (args.test_size,args.test_size) if args.dataset == 'COCO': class_names = COCO_CLASSES num_class=80 elif args.dataset == 'VOC': class_names = VOC_CLASSES num_class=20 else: raise Exception("Only support COCO or VOC model now!") # Initiate model if args.asff: if backbone == 'mobile': from models.yolov3_mobilev2 import YOLOv3 print("For mobilenet, we currently don't support dropblock, rfb and FeatureAdaption") else: from models.yolov3_asff import YOLOv3 print('Training YOLOv3 with ASFF!') model = YOLOv3(num_classes = num_class, rfb=args.rfb, asff=args.asff) else: if backbone == 'mobile': from models.yolov3_mobilev2 import YOLOv3 else: from models.yolov3_baseline import YOLOv3 print('Training YOLOv3 strong baseline!') model = YOLOv3(num_classes = num_class, rfb=args.rfb) if args.checkpoint: print("loading pytorch ckpt...", args.checkpoint) cpu_device = torch.device("cpu") ckpt = torch.load(args.checkpoint, map_location=cpu_device) model.load_state_dict(ckpt,strict=False) #model.load_state_dict(ckpt) if cuda: print("using cuda") torch.backends.cudnn.benchmark = True device = torch.device("cuda") model = model.to(device) if args.half: model = model.half() model = model.eval() dtype = torch.float16 if args.half else torch.float32 #load img transform = ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229,0.224,0.225)) im = cv2.imread(args.img) height, width, _ = im.shape ori_im = im.copy() im_input, _ = transform(im, None, test_size) if cuda: im_input = im_input.to(device) im_input = Variable(im_input.type(dtype).unsqueeze(0)) outputs= model(im_input) outputs = postprocess(outputs, num_class, 0.01, 0.65) outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] bboxes[:, 0::2] *= width / test_size[0] bboxes[:, 1::2] *= height / test_size[1] bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] cls = outputs[:, 6] scores = outputs[:, 4]* outputs[:,5] pred_im=vis(ori_im, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=class_names) cv2.imshow('Detection', pred_im) cv2.waitKey(0) cv2.destroyAllWindows() sys.exit(0)
def demo(args, video_pth, video_name): """ YOLOv3 demo. See README for details. """ # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.safe_load(f) #print("successfully loaded config file: ", cfg) backbone = cfg['MODEL']['BACKBONE'] test_size = (args.test_size, args.test_size) if args.dataset == 'COCO': class_names = COCO_CLASSES num_class = 80 if args.dataset == 'VOC': class_names = VOC_CLASSES num_class = 20 if args.dataset == 'SWIM': class_names = SWIM_CLASSES num_class = 1 else: raise Exception("Only support COCO, VOC ang SWIM model now!") # Initiate model if args.asff: if backbone == 'mobile': from models.yolov3_mobilev2 import YOLOv3 #print("For mobilenet, we currently don't support dropblock, rfb and FeatureAdaption") else: from models.yolov3_asff import YOLOv3 #print('Training YOLOv3 with ASFF!') model = YOLOv3(num_classes=num_class, rfb=args.rfb, asff=args.asff) else: if backbone == 'mobile': from models.yolov3_mobilev2 import YOLOv3 else: from models.yolov3_baseline import YOLOv3 #print('Training YOLOv3 strong baseline!') model = YOLOv3(num_classes=num_class, rfb=args.rfb) if args.checkpoint: #print("loading pytorch ckpt...", args.checkpoint) cpu_device = torch.device("cpu") ckpt = torch.load(args.checkpoint, map_location=cpu_device) #model.load_state_dict(ckpt,strict=False) model.load_state_dict(ckpt) if cuda: torch.backends.cudnn.benchmark = True device = torch.device("cuda") model = model.to(device) if args.half: model = model.half() model = model.eval() dtype = torch.float16 if args.half else torch.float32 #load video transform = ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) cap = cv2.VideoCapture(video_pth) fps = cap.get(cv2.CAP_PROP_FPS) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #4 width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) #3 fourcc = cv2.VideoWriter_fourcc(*'MJPG') vid_writer = cv2.VideoWriter( os.path.join(args.save_path, video_name + '.avi'), fourcc, fps, (width, height)) while cap.isOpened(): ret, frame = cap.read() if ret: ori_frame = frame.copy() frame_input, _ = transform(frame, None, test_size) if cuda: frame_input = frame_input.to(device) frame_input = Variable(frame_input.type(dtype).unsqueeze(0)) outputs = model(frame_input) outputs = postprocess(outputs, num_class, 0.01, 0.65) if type(outputs) != type([None]): outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] bboxes[:, 0::2] *= width / test_size[0] bboxes[:, 1::2] *= height / test_size[1] #bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] #bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] cls = outputs[:, 6] scores = outputs[:, 4] * outputs[:, 5] pred_frame = vis(ori_frame, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=class_names) else: pred_frame = ori_frame cv2.namedWindow("Detection", 0) cv2.resizeWindow("enhanced", 720, 640) cv2.imshow('Detection', pred_frame) vid_writer.write(pred_frame) else: break key = cv2.waitKey(1) if key == ord("q"): break cap.release() cv2.destroyAllWindows()