def update(self): while (True): sys.stdout.flush() print("detection processor len : " + str(self.Q.qsize())) # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.read() if orig_img is None: self.Q.put((None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: # while self.Q.full(): # time.sleep(0.2) self.Q.put((None, orig_img, im_name, boxes, scores, None, None)) continue inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # while self.Q.full(): # time.sleep(0.2) self.Q.put( (inps, orig_img, im_name, boxes, scores, pt1, pt2))
def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): ## 第一步人体检测 (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.read() if orig_img is None: self.Q.put((None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: while self.Q.full(): time.sleep(0.2) self.Q.put( (None, orig_img, im_name, boxes, scores, None, None)) continue if self.output_cropimg: inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) else: inps, pt1, pt2 = None, None, None while self.Q.full(): time.sleep(0.2) p = print # p('---------------------------------') # p(boxes.shape) # p(scores.shape) # p(pt1.shape) # p(pt2.shape) # p(orig_img.shape) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2))
def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2, CAR) = self.detectionLoader.read() # print('detection processor' , im_name, boxes) if orig_img is None: self.Q.put((None, None, None, None, None, None, None, None)) return # if boxes is None or boxes.nelement() == 0: if boxes is None : while self.Q.full(): time.sleep(0.2) self.Q.put((None, orig_img, im_name, boxes, scores, None, None, CAR)) continue inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # print('detection processor', pt1, pt2) while self.Q.full(): time.sleep(0.2) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2, CAR))
def human_detect(self): img, orig_img, im_dim_list = self.img, self.orig_img, self.im_dim_list if img is None: self.human_detect_result = (None, None, None, None, None, None) #print('seection1') return with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: #no dets self.human_detect_result = (None, orig_img[0], None, None, None, None) #print('seection2') #print(self.human_detect_result[2]) return dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] if isinstance(boxes, int) or boxes.shape[0] == 0: self.human_detect_result = (None, orig_img[0], boxes, scores, None, None) #print('seection3') return inps = torch.zeros(boxes.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inp = im_to_torch(cv2.cvtColor(orig_img[0], cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) self.human_detect_result = (inps, orig_img[0], boxes, scores, pt1, pt2) #print('seection4') return
def process(self, orig_img, im_name, boxes, scores, inps, pt1, pt2): with torch.no_grad(): if orig_img is None: return None, None, None, None, None, None, None if boxes is None or boxes.nelement() == 0: return None, orig_img, im_name, boxes, scores, None, None inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) return inps, orig_img, im_name, boxes, scores, pt1, pt2
def detect_image(self, im_path, outputdir): with torch.no_grad(): ori_im, im_name, boxes, scores, inps, pt1, pt2 = \ self.detection_loader.detect_image(im_path) if ori_im is None: return (None, None, None, None, None, None, None) if boxes is None or boxes.nelement() == 0: return (None, ori_im, im_name, boxes, scores, None, None) inp = im_to_torch(cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2) return (inps, ori_im, im_name, boxes, scores, pt1, pt2)
def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.Q[i] if boxes is None or boxes.nelement() == 0: self.Q.append( (None, orig_img, im_name, boxes, scores, None, None)) continue inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) self.Q.append( (inps, orig_img, im_name, boxes, scores, pt1, pt2))
def update(self): # keep looping the whole dataset while True: with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.read() with self.detectionLoader.Q.mutex: self.detectionLoader.Q.queue.clear() if boxes is None or boxes.nelement() == 0: while self.Q.full(): time.sleep(0.2) self.Q.put((None, orig_img, im_name, boxes, scores, None, None)) continue inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) while self.Q.full(): time.sleep(0.2) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2))
def get_prediction(self, orig_img, boxes, scores, single_height, output_l): inp = im_to_torch(orig_img) inps = torch.zeros(boxes.size(0), 3, args.inputResH, args.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) inps = Variable(inps.cuda()) hm = self.pose_model(inps) if boxes is None: return False, hm else: preds_hm, preds_img, preds_scores = getPrediction( hm.cpu(), pt1, pt2, args.inputResH, args.inputResW, args.outputResH, args.outputResW) # result(被姿态估计筛选后的) : bbox,bbox_score,roi,keypoints, kp_score (两个镜头的) box, box_s, roi, kp, kp_s = pose_nms(boxes, scores, preds_img, preds_scores, single_height, orig_img, output_l) return True, (box, box_s, roi, kp, kp_s)
def update(self): # keep looping infinitely while True: # otherwise, ensure the queue has room in it if not self.Q.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img, orig_img, dim = prep_frame(frame, inp_dim) inp = im_to_torch(orig_img) im_dim_list = torch.FloatTensor([dim]).repeat(1, 2) self.Q.put((img, orig_img, inp, im_dim_list)) else: with self.Q.mutex: self.Q.queue.clear()
def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2) = self.detectionLoader.read() if orig_img is None: self.Q.put((None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: while self.Q.full(): time.sleep(0.2) self.Q.put( (None, orig_img, im_name, boxes, scores, None, None)) continue # inp代表输入的图像,imread获取的是BGR,需要通过转换函数变换成RGB的tensor类型 inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2, im_name) while self.Q.full(): time.sleep(0.2) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2))
def update(self): # keep looping the whole video for i in range(self.num_batches): img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cuda() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list = im_dim_list.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))
def process(orig_img,boxes,scores,inps,pt1,pt2): with torch.no_grad(): inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) return (orig_img,boxes,scores,inps,pt1,pt2)
def detect_main(im_name, orig_img, det_model, pose_model, opt): args = opt mode = args.mode inp_dim = int(opt.inp_dim) dim = orig_img.shape[1], orig_img.shape[0] img_ = (letterbox_image(orig_img, (inp_dim, inp_dim))) img_ = img_[:, :, ::-1].transpose((2, 0, 1)).copy() img = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) img = [img] orig_img = [orig_img] im_name = [im_name] im_dim_list = [dim] # img.append(img_k) # orig_img.append(orig_img_k) # im_name.append(im_name_k) # im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list # DetectionLoader det_inp_dim = int(det_model.net_info['height']) assert det_inp_dim % 32 == 0 assert det_inp_dim > 32 # res_n = 0 with torch.no_grad(): img = img.cuda() prediction = det_model(img, CUDA=True) # a tensor boxes_chair = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 56) boxes_sofa = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 57) boxes_bed = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 59) dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, 0, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: # cv2.imwrite('err_result/no_person/'+im_name[0][0:-4]+'_re.jpg', orig_img[0]) return [] dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: boxes = None scores = None inps = None pt1 = None pt2 = None else: inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img = orig_img[0] im_name = im_name[0] boxes = boxes_k scores = scores[dets[:, 0] == 0] # orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 # DetectionProcess with torch.no_grad(): if boxes is None or boxes.nelement() == 0: pass else: inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2)) batchSize = args.posebatch # fall_res_all = [] for i in range(1): with torch.no_grad(): if boxes is None or boxes.nelement() == 0: # writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) # res_n = 0 continue # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu() # writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) fall_res = [] keypoint_res = [] # fall_res.append(im_name.split('/')[-1]) if opt.matching: preds = getMultiPeakPrediction(hm, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} # img = orig_img img = vis_frame(orig_img, result) for human in result['result']: keypoint = human['keypoints'] kp_scores = human['kp_score'] keypoint = keypoint.numpy() xmax = max(keypoint[:, 0]) xmin = min(keypoint[:, 0]) ymax = max(keypoint[:, 1]) ymin = min(keypoint[:, 1]) box_hm = [xmin, ymin, xmax, ymax] kp_num = 0 for i in range(len(kp_scores)): if kp_scores[i] > 0.05: kp_num += 1 if kp_num < 10: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) fall_res.append([False, xmin, ymin, xmax, ymax]) # print("kp_num:"+str(kp_num)) continue overlap = [] for box in boxes_chair: overlap.append(compute_overlap(box_hm, box)) for box in boxes_sofa: overlap.append(compute_overlap(box_hm, box)) for box in boxes_bed: overlap.append(compute_overlap(box_hm, box)) if len(overlap) > 0 and max(overlap) >= 0.6: # res_n = 0 fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("overlap:"+str(overlap)) continue w = xmax - xmin h = ymax - ymin ratio = w / h # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) xhead = (keypoint[1][0] + keypoint[2][0] + keypoint[2][0] + keypoint[3][0] + keypoint[4][0]) / 4 yhead = (keypoint[1][1] + keypoint[2][1] + keypoint[2][1] + keypoint[3][1] + keypoint[4][1]) / 4 xfeet = (keypoint[15][0] + keypoint[16][0]) / 2 yfeet = (keypoint[15][1] + keypoint[16][1]) / 2 d_ear = (abs(keypoint[3][0] - keypoint[4][0])**2 + abs(keypoint[3][1] - keypoint[4][1])**2)**0.5 r = (w**2 + h**2)**0.5 / d_ear if kp_scores[3] > 0.05 and kp_scores[4] > 0.05 and r < 4: fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("r<4") continue # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) # xhead_foot = abs(xfeet - xhead) # yhead_foot = abs(yfeet - yhead) # dhead_foot = (xhead_foot ** 2 + yhead_foot ** 2) ** 0.5 # ratio = yhead_foot / dhead_foot if min(kp_scores[3], kp_scores[4], kp_scores[15], kp_scores[16]) > 0.05 and yfeet < (keypoint[3][1] + keypoint[4][1]) / 2: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall!', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 255, 0), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 2 elif w / h >= 1.0: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 0, 255), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 1 else: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("normal") fall_res.append([False, xmin, ymin, xmax, ymax]) # res_n = 0 # cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) ''' for box in boxes_chair: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0), 2) for box in boxes_sofa: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 2) for box in boxes_bed: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 255), 2) cv2.imwrite('err_result/false/'+im_name[0:-4]+'_re.jpg', img) ''' return keypoint_res
def update(self): time1 = time.time() _, frame = self.stream.read() # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2)) #TODO TESTING # frame[:,:200,:]=0 # frame[:,450:,:]=0 img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim) img = [img_k] im_name = ["im_name"] im_dim_list = [im_dim_list_k] img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) time2 = time.time() with torch.no_grad(): ### detector ######################### # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: self.visualize2dnoperson() return None dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.visualize2dnoperson() raise NotImplementedError return None inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) time3 = time.time() ### processor ######################### inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2) ### generator ######################### self.orig_img = np.array(self.orig_img, dtype=np.uint8) # location prediction (n, kp, 2) | score prediction (n, kp, 1) datalen = inps.size(0) batchSize = 20 #args.posebatch() leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] time4 = time.time() for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu().data preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms( boxes, scores, preds_img, preds_scores) time5 = time.time() if not result: # No people self.visualize2dnoperson() return None else: self.kpt = max(result, key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints'] self.visualize2d() return self.kpt time6 = time.time() print("process time : {} ".format(time6 - time5))
def get_pose(self, img_names): if len(img_names) > 1: start_lc = 4000 start_rc = 4000 now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-Ten========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): print('--------------------') img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) continue with torch.no_grad(): img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) height_difference.append(None) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] k = 0 boxes_k = boxes[dets[:, 0] == k] inps = torch.zeros(boxes_k.size(0), 3, self.args.inputResH, self.args.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img, im_name, boxes, scores, inps, pt1, pt2 = orig_img[ k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) batchSize = self.args.posebatch datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min( (j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm_data = hm.cpu() orig_img = np.array(orig_img, dtype=np.uint8) im_name = im_name.split('/')[-1] preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, self.args.inputResH, self.args.inputResW, self.args.outputResH, self.args.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} img = vis_frame(orig_img, result) vis_images.append(img) outpur_dir = os.path.join(self.args.outputpath, 'vis') outpur_dir_raw = os.path.join(self.args.outputpath, 'raw') if not os.path.exists(outpur_dir): os.makedirs(outpur_dir) if not os.path.exists(outpur_dir_raw): os.makedirs(outpur_dir_raw) width = img.shape[1] keypoints = [res['keypoints'][0] for res in result['result']] distance = [xy[0] - width / 2 for xy in keypoints] distance = torch.tensor([torch.abs(m) for m in distance]) indice = torch.argsort(distance)[0] pose_result = result['result'][indice]['keypoints'] # left_arm = pose_result[[6, 8, 10]].numpy() # right_arm = pose_result[[5, 7, 9]].numpy() # ['Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder', 'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist', 'LHip', # 'RHip', 'LKnee', 'RKnee', 'LAnkle', 'RAnkle'] left_arm = pose_result[[10]].numpy().astype(int) right_arm = pose_result[[9]].numpy().astype(int) left_arm_c_y = np.mean(left_arm, axis=0)[1] right_arm_c_y = np.mean(right_arm, axis=0)[1] # left_arm_c = tuple(np.mean(left_arm, axis=0).astype(int)) # right_arm_c = tuple(np.mean(right_arm, axis=0).astype(int)) left_arm_c = tuple(left_arm[0]) right_arm_c = tuple(right_arm[0]) hd = np.abs(left_arm_c_y - right_arm_c_y) height_difference.append(hd) cv2.circle(img, left_arm_c, 10, (0, 255, 0), -1, 8) cv2.circle(img, right_arm_c, 10, (0, 255, 0), -1, 8) log__vis_name = now_time + '-' + im_name cv2.imwrite(os.path.join(outpur_dir_raw, log__vis_name), orig_img) cv2.imwrite(os.path.join(outpur_dir, log__vis_name), img) if start_lc == 4000 and start_rc == 4000: start_lc = left_arm_c_y start_rc = right_arm_c_y left_move = 0 right_move = 0 else: left_move = left_arm_c_y - start_lc right_move = right_arm_c_y - start_rc print('index-{}--{}: left_c {:0f},right_c {:0f}'.format( img_index, im_name, left_arm_c_y, right_arm_c_y)) print('index-{}--{}: start_lc {:0f},start_rc {:0f}'.format( img_index, im_name, start_lc, start_rc)) print('index-{}--{}: left_move {:0f},right_move {:0f}'.format( img_index, im_name, left_move, right_move)) print('index-{}--{}: height_difference {:0f}'.format( img_index, im_name, hd)) final_result.append((left_move, right_move)) return final_result, vis_images, now_time, height_difference elif len(img_names) == 1: now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-One========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) with torch.no_grad(): img = torch.cat(img) vis_img = img.numpy()[0] vis_img = np.transpose(vis_img, (1, 2, 0)) vis_img = vis_img[:, :, ::-1] vis_images.append(vis_img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) else: print('index-{}: Person detected'.format(img_index)) final_result.append((4, 4)) return final_result, vis_images, now_time, height_difference
def forward(self, Q_load, Q_det): # keep looping the whole dataset while True: #print(Q_load.qsize(), Q_det.qsize()) img, orig_img, im_dim_list = Q_load.get() with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if Q_det.full(): time.sleep(0.1) #print("detectionloaderQ1 full ") #Q_det.put((orig_img[k], None, None, None, None, None)) Q_det.put((None, orig_img[k], None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2) if Q_det.full(): time.sleep(0.1) #print("detectionloaderQ3 full ") #Q_det.put((orig_img[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)) Q_det.put((inps, orig_img[k], boxes_k, scores[dets[:, 0] == k], pt1, pt2))
begin = time.time() start_time = getTime() frame_0 = fvs_0.read() frame_1 = fvs_1.read() single_height = frame_0.shape[0] print(frame_0.shape) # (432, 768, 3) # pre-process frame = np.concatenate([frame_0, frame_1], 0) inp_dim = int(args.inp_dim) # default=608 img, orig_img, dim = prep_frame(frame, inp_dim) #print('img:',img.shape) # torch.Size([1, 3, 608, 608]) # print('orig_img:',orig_img.shape) # (864, 768, 3) # print('dim',dim) # (768, 864) inp = im_to_torch(orig_img) im_dim_list = torch.FloatTensor([dim]).repeat(1, 2) # print(im_dim_list) # tensor([[768., 864., 768., 864.]]) ckpt_time, load_time = getTime(start_time) runtime_profile['ld'].append(load_time) with torch.no_grad(): # human detection img = Variable(img).cuda() im_dim_list = im_dim_list.cuda() # ################### yolo_start = time.time() prediction = det_model(img, CUDA=True) yolo_end = time.time() _yolo_delta = yolo_end - yolo_start
def update(self): print( f'WebcamDetectionLoader_update_thread: {threading.currentThread().name}' ) # keep looping while True: img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(self.batchSize): (grabbed, frame) = self.stream.read() h, w, c = frame.shape # frame = cv2.resize(frame, (int(w / 4), int(h / 4)), interpolation=cv2.INTER_CUBIC) if not grabbed: continue # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cuda() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list = im_dim_list.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): if self.Q.full(): with self.Q.mutex: self.Q.queue.clear() self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): if self.Q.full(): with self.Q.mutex: self.Q.queue.clear() self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))
def update(self): while True: (img, orig_img, im_name, im_dim_list) = self.dataloder.getitem() with self.dataloder.Q.mutex: self.dataloder.Q.queue.clear() with torch.no_grad(): # Human Detection #img = img.cuda() img = img.cuda() prediction = self.det_model(img, CUDA=True) # im_dim_list = im_dim_list.cuda() frame_id = int(im_name.split('.')[0]) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put( (orig_img, frame_id, None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] # Pose Estimation inp = im_to_torch(orig_img) inps = torch.zeros(boxes.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) inps = Variable(inps.cuda()) hm = self.pose_model(inps) if boxes is None: if self.Q.full(): time.sleep(2) self.Q.put( (orig_img, frame_id, None, None, None, None, None)) continue else: preds_hm, preds_img, preds_scores = getPrediction( hm.cpu(), pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) bbox, b_score, kp, kp_score, roi = pose_nms( orig_img, boxes, scores, preds_img, preds_scores) # result = { # 'imgname': im_name, # 'result': result, # 'orig_img' : orig_img # } if self.Q.full(): time.sleep(2) #self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)) #self.Q.put((result, orig_img, im_name)) self.Q.put( (orig_img, frame_id, bbox, b_score, kp, kp_score, roi))