def getitem_yolo(self): for i in range(self.num_batches): img = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): inp_dim = int(opt.inp_dim) im_name_k = self.imglist[k].rstrip('\n').rstrip('\r') im_name_k = os.path.join(self.img_dir, im_name_k) img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) # For data preprocessing img_k = self.transform(Image.open(im_name_k)).unsqueeze(0) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list_ = im_dim_list while self.Q.full(): time.sleep(2) self.Q.put((img, orig_img, im_name, im_dim_list))
def getitem_mtcnn(self): """Same as getitem_yolo()""" for i in range(self.num_batches): img = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): inp_dim = int(opt.inp_dim) im_name_k = self.imglist[k].rstrip('\n').rstrip('\r') im_name_k = os.path.join(self.img_dir, im_name_k) try: img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) except BaseException as e: print(im_name_k, e) continue img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list_ = im_dim_list while self.Q.full(): time.sleep(2) self.Q.put((img, orig_img, im_name, im_dim_list))
def detect_person(frame, model, inp_dim, confidence, num_classes, nms_thesh): img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), True) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: curr = [] return curr im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) curr = list(map(lambda x: write(x), output)) return curr
def getitem_yolo(self, index): inp_dim = int(opt.inp_dim) im_name = self.imglist[index].rstrip('\n').rstrip('\r') im_name = os.path.join(self.img_dir, im_name) im, orig_img, im_dim = prep_image(im_name, inp_dim) #im_dim = torch.FloatTensor([im_dim]).repeat(1, 2) inp = load_image(im_name) return im, inp, orig_img, im_name, im_dim
def detect_one_img(self, img_name): """ Detect bboxs in one image Input: 'str', full path of image Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]', The output results are similar with coco results type, except that image_id uses full path str instead of coco %012d id for generalization. """ args = self.detector_opt _CUDA = True if args: if args.gpus[0] < 0: _CUDA = False if not self.model: self.load_model() if isinstance(self.model, torch.nn.DataParallel): self.model = self.model.module dets_results = [] # pre-process(scale, normalize, ...) the image img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim) with torch.no_grad(): img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2) img = img.to(args.device) if args else img.cuda() prediction = self.model(img, args=args) # do nms to the detection results, only human category is left dets = self.dynamic_write_results(prediction, self.confidence, self.num_classes, nms=True, nms_conf=self.nms_thres) if isinstance(dets, int) or dets.shape[0] == 0: return None dets = dets.cpu() img_dim_list = torch.index_select(img_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.inp_dim / img_dim_list, 1)[0].view(-1, 1) dets[:, [1, 3]] -= (self.inp_dim - scaling_factor * img_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.inp_dim - scaling_factor * img_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for i in range(dets.shape[0]): dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0, img_dim_list[i, 0]) dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0, img_dim_list[i, 1]) # write results det_dict = {} x = float(dets[i, 1]) y = float(dets[i, 2]) w = float(dets[i, 3] - dets[i, 1]) h = float(dets[i, 4] - dets[i, 2]) det_dict["category_id"] = 1 det_dict["score"] = float(dets[i, 5]) det_dict["bbox"] = [x, y, w, h] det_dict["image_id"] = int(os.path.basename(img_name).split('.')[0]) dets_results.append(det_dict) return dets_results
def getitem_yolo(self, index): inp_dim = int(opt.inp_dim) im_name = self.imglist[index].rstrip('\n').rstrip('\r') im_name = os.path.join(self.img_dir, im_name) # For data preprocessing im, orig_img, im_dim = prep_image(im_name, inp_dim) im_dim = torch.FloatTensor([im_dim]).repeat(1, 2) inp = self.transform(Image.open(im_name)) # inp = load_image(im_name) return im, inp, orig_img, im_name, im_dim
def image_preprocess(self, img_source): """ Pre-process the img before fed to the object detection network Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR) Output: pre-processed image data(torch.FloatTensor,(1,3,h,w)) """ if isinstance(img_source, str): img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim) elif isinstance(img_source, torch.Tensor) or isinstance(img_source, np.ndarray): img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim) else: raise IOError('Unknown image source type: {}'.format(type(img_source))) return img
def cnv_img(Frame): img = [] orig_img = [] im_dim_list = [] # for k in range(i*self.batchSize, min((i + 1)*self.batchSize, self.datalen)): inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_image(Frame, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) return img, orig_img, im_dim_list
def main(): global confidence, nms_thesh, num_classes, classes # Load configuration model = erfnet.Net(11) model = torch.nn.DataParallel(model.cuda(), [0]) model.eval() detect_model = detect_net.DetectionNetwork() # Inference # Data loading matches = [ '/home/vertensj/Documents/annotated_real_data/processed/images/track3_withCP/image_paths.txt' ] list_dataset_paths = [] # Stores pairs of consecutive image paths for f in matches: with open(f) as f: content = f.readlines() content = [x.strip() for x in content] for line in content: frames = line.split(" ") # Current first, then previous list_dataset_paths.append(frames[0]) # Image, Seg, Dt for path in list_dataset_paths: # Data preparation ############################################################################################# image_prepared, org_img, dim = prep_image(path, detect_model.inp_dim) image_prepared = image_prepared.cuda() im_dim_list = [dim] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2).cuda() # Detection Inference ########################################################################################## detect_output = detect_model.detect(image_prepared, im_dim_list) if detect_output is not None: detect_image = detect_model.visualize_outputs( detect_output, org_img) cv2.imshow('detect_image', detect_image) cv2.waitKey()
def get_pose(self, img_names): if len(img_names) > 1: start_lc = 4000 start_rc = 4000 now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-Ten========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): print('--------------------') img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) continue with torch.no_grad(): img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) height_difference.append(None) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] k = 0 boxes_k = boxes[dets[:, 0] == k] inps = torch.zeros(boxes_k.size(0), 3, self.args.inputResH, self.args.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img, im_name, boxes, scores, inps, pt1, pt2 = orig_img[ k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) batchSize = self.args.posebatch datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min( (j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm_data = hm.cpu() orig_img = np.array(orig_img, dtype=np.uint8) im_name = im_name.split('/')[-1] preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, self.args.inputResH, self.args.inputResW, self.args.outputResH, self.args.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} img = vis_frame(orig_img, result) vis_images.append(img) outpur_dir = os.path.join(self.args.outputpath, 'vis') outpur_dir_raw = os.path.join(self.args.outputpath, 'raw') if not os.path.exists(outpur_dir): os.makedirs(outpur_dir) if not os.path.exists(outpur_dir_raw): os.makedirs(outpur_dir_raw) width = img.shape[1] keypoints = [res['keypoints'][0] for res in result['result']] distance = [xy[0] - width / 2 for xy in keypoints] distance = torch.tensor([torch.abs(m) for m in distance]) indice = torch.argsort(distance)[0] pose_result = result['result'][indice]['keypoints'] # left_arm = pose_result[[6, 8, 10]].numpy() # right_arm = pose_result[[5, 7, 9]].numpy() # ['Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder', 'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist', 'LHip', # 'RHip', 'LKnee', 'RKnee', 'LAnkle', 'RAnkle'] left_arm = pose_result[[10]].numpy().astype(int) right_arm = pose_result[[9]].numpy().astype(int) left_arm_c_y = np.mean(left_arm, axis=0)[1] right_arm_c_y = np.mean(right_arm, axis=0)[1] # left_arm_c = tuple(np.mean(left_arm, axis=0).astype(int)) # right_arm_c = tuple(np.mean(right_arm, axis=0).astype(int)) left_arm_c = tuple(left_arm[0]) right_arm_c = tuple(right_arm[0]) hd = np.abs(left_arm_c_y - right_arm_c_y) height_difference.append(hd) cv2.circle(img, left_arm_c, 10, (0, 255, 0), -1, 8) cv2.circle(img, right_arm_c, 10, (0, 255, 0), -1, 8) log__vis_name = now_time + '-' + im_name cv2.imwrite(os.path.join(outpur_dir_raw, log__vis_name), orig_img) cv2.imwrite(os.path.join(outpur_dir, log__vis_name), img) if start_lc == 4000 and start_rc == 4000: start_lc = left_arm_c_y start_rc = right_arm_c_y left_move = 0 right_move = 0 else: left_move = left_arm_c_y - start_lc right_move = right_arm_c_y - start_rc print('index-{}--{}: left_c {:0f},right_c {:0f}'.format( img_index, im_name, left_arm_c_y, right_arm_c_y)) print('index-{}--{}: start_lc {:0f},start_rc {:0f}'.format( img_index, im_name, start_lc, start_rc)) print('index-{}--{}: left_move {:0f},right_move {:0f}'.format( img_index, im_name, left_move, right_move)) print('index-{}--{}: height_difference {:0f}'.format( img_index, im_name, hd)) final_result.append((left_move, right_move)) return final_result, vis_images, now_time, height_difference elif len(img_names) == 1: now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-One========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) with torch.no_grad(): img = torch.cat(img) vis_img = img.numpy()[0] vis_img = np.transpose(vis_img, (1, 2, 0)) vis_img = vis_img[:, :, ::-1] vis_images.append(vis_img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) else: print('index-{}: Person detected'.format(img_index)) final_result.append((4, 4)) return final_result, vis_images, now_time, height_difference
def main(): global args args = parser.parse_args() # Yolo confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes model = Darknet(args.config_file) model.load_weights(args.weights_file) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() # Connect client = paho.Client() host, port = args.broker_url.split(':') client.connect(host, int(port)) # subscribe a system messages client.message_callback_add("$SYS/#", system_message) client.subscribe("$SYS/#") # Open rtsp stream cap = cv2.VideoCapture(args.input_url) assert cap.isOpened(), 'Cannot capture source {}'.format(args.input_url) # Inspect input stream input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) input_fps = cap.get(cv2.CAP_PROP_FPS) print("[input stream] width: {}, height: {}, fps: {}".format( input_width, input_height, input_fps)) # Open output stream output_command = stream_factory(args.output_url, input_width, input_height, input_fps) print(output_command) output_stream = sp.Popen(output_command, stdin=sp.PIPE, stderr=sp.PIPE) frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() # frame size: 640x360x3(=691200) if ret: # Our detect operations on the frame come here img, orig_im, dim = prep_image(frame, inp_dim) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('yolo/data/coco.names') colors = pkl.load(open("yolo/pallete", "rb")) # Overlay on screen list(map(lambda x: write(x, orig_im, classes, colors), output)) # Send a BBoxes # Display the resulting frame cv2.imshow("frame", orig_im) frames += 1 print("FPS of the video is {:5.2f}, size: {}".format( frames / (time.time() - start), orig_im.size)) # Write rtmp stream output_stream.stdin.write(frame.tostring()) else: break if cv2.waitKey(1) & 0xFF == ord('q'): break # Close cap.release() cv2.destroyAllWindows() client.disconnect()
if CUDA: model.cuda() model.eval() cap = cv2.VideoCapture(args.video) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(img) output = sift_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format(