class VideoDetectionLoader: def __init__(self, path, batchSize=4, queueSize=256): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg") self.det_model.load_weights('models/yolo/yolov3-spp.weights') self.det_model.net_info['height'] = opt.inp_dim self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 self.det_model.cpu() self.det_model.eval() self.stream = cv2.VideoCapture(path) assert self.stream.isOpened(), 'Cannot capture source' self.stopped = False self.batchSize = batchSize self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT)) leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store frames read from # the video file self.Q = Queue(maxsize=queueSize) def length(self): return self.datalen def len(self): return self.Q.qsize() def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping the whole video for i in range(self.num_batches): img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cpu() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list = im_dim_list.cpu() prediction = self.det_model(img, CUDA=False) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k])) def videoinfo(self): # indicate the video info fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC)) fps = self.stream.get(cv2.CAP_PROP_FPS) frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) return (fourcc, fps, frameSize) def read(self): # return next frame in the queue return self.Q.get() def more(self): # return True if there are still frames in the queue return self.Q.qsize() > 0 def stop(self): # indicate that the thread should be stopped self.stopped = True
fvs = WebcamLoader(webcam).start() (fourcc,fps,frameSize) = fvs.videoinfo() # Data writer save_path = os.path.join(args.outputpath, 'AlphaPose_webcam'+webcam+'.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() # Load YOLO model print('Loading YOLO model..') sys.stdout.flush() det_model = Darknet("yolo/cfg/yolov3-spp.cfg") det_model.load_weights('models/yolo/yolov3-spp.weights') det_model.net_info['height'] = args.inp_dim det_inp_dim = int(det_model.net_info['height']) assert det_inp_dim % 32 == 0 assert det_inp_dim > 32 det_model.cpu() det_model.eval() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cpu() pose_model.eval() runtime_profile = { 'ld': [], 'dt': [], 'dn': [],
class DetectionLoader: def __init__(self, dataloder, batchSize=1, queueSize=1024): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.det_model = Darknet( "/home/a/roborts_project/src/alpha_pose/src/yolo/cfg/yolov3-spp.cfg" ) self.det_model.load_weights( '/home/a/roborts_project/src/alpha_pose/src/models/yolo/yolov3-spp.weights' ) self.det_model.net_info['height'] = opt.inp_dim self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 self.det_model.cpu() self.det_model.eval() self.stopped = False self.dataloder = dataloder self.batchSize = batchSize self.datalen = self.dataloder.length() leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the queue used to store frames read from # the video file if opt.sp: self.Q = Queue(maxsize=queueSize) else: self.Q = mp.Queue(maxsize=queueSize) def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=()) p.daemon = True p.start() return self def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() if img is None: self.Q.put((None, None, None, None, None, None, None)) return with torch.no_grad(): # Human Detection img = img.cpu() prediction = self.det_model(img, CUDA=False) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2)) def read(self): # return next frame in the queue return self.Q.get() def len(self): # return queue len return self.Q.qsize()
class DetectionLoader: def __init__(self, dataloder, batchSize=1): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg") self.det_model.load_weights('models/yolo/yolov3-spp.weights') self.det_model.net_info['height'] = opt.inp_dim self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if opt.device == 'GPU': self.det_model.cuda() else: self.det_model.cpu() self.det_model.eval() self.dataloder = dataloder self.batchSize = batchSize self.datalen = self.dataloder.length() leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover # initialize the list used to store frames read from # the video file self.Q = list() def start(self): # start to dectect person self.update() def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.Q[i] with torch.no_grad(): # Human Detection if opt.device == 'GPU': img = img.cuda() else: img = img.cpu() prediction = self.det_model( img, CUDA=True if opt.device == 'GPU' else False) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): self.Q.append((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.Q.append((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) # multiply the score with bounding box height processed_scores = self.cal_scores(scores, boxes_k) self.Q.append( (orig_img[k], im_name[k], boxes_k[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], scores[np.argmax(processed_scores)], inps[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], pt1[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], pt2[np.argmax(processed_scores ):np.argmax(processed_scores) + 1])) def cal_scores(self, scores, boxes): processed_scores = scores.clone() for i in range(boxes.shape[0]): processed_scores[i][0] *= abs(boxes[i][1] - boxes[i][3]) return processed_scores def len(self): # return list len return len(self.Q)