def display_results(mode, in_image, data_2d, visibilities, data_3d): """Plot 2D and 3D poses for each of the people in the image.""" n_poses = len(data_3d) if mode == 'openpose': color_im = OpPoseEstimator.draw_humans(in_image, data_2d, imgcopy=False) else: draw_limbs(in_image, data_2d, visibilities) plt.subplot(1, n_poses + 1, 1) plt.imshow(in_image) plt.axis('off') # Show 3D poses for idx, single_3D in enumerate(data_3d): plot_pose(Prob3dPose.centre_all(single_3D), visibility_to_3d(visibilities[idx]), n_poses + 1, idx + 2)
def infer(self, image): """ calls the inference API inside tf_pose (openpose) returning the poses of humans and drawing the skeleton on image frame """ self.image = image if self.image is None: raise Exception('The image is not valid. check your image') self.humans = self.e.inference(self.image, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=self.resize_out_ratio) self.image = TfPoseEstimator.draw_humans(self.image, self.humans, imgcopy=False) return self.image
def get_pose_estimation(estimator, image, resolution, resize_ratio=4.0, show_background=False): w, h = resolution humans_estimator = estimator.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=resize_ratio) if not show_background: image = np.zeros((w, h, 3), np.uint8) image[:, :] = (255, 255, 255) # Create blank image with pose estimator's skeleton image = TfPoseEstimator.draw_humans(image, humans_estimator, imgcopy=False) return image, humans_estimator
def run(image_name): w, h = model_wh('1280x720') e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) logger.debug('image process+') image = common.read_imgfile('./img/' + image_name, None, None) if image is None: logger.error('Image can not be read, path=%s' % image_file) sys.exit(-1) humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) logger.debug('postprocess+') # ここから姿勢のポイントを画像に書き込み、点と線が追加された画像が返ってくる image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.imwrite('./img/' + image_name, image)
def draw(self, img_disp, humans): """Draw human skeleton on img_disp inplace. Argument: img_disp {RGB image} humans {a class returned by self.detect} """ img_disp = TfPoseEstimator.draw_humans(img_disp, humans, imgcopy=False) if IS_DRAW_FPS: cv2.putText( img_disp, "fps = {:.1f}".format((1.0 / (time.time() - self._prev_t))), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, ) self._prev_t = time.time()
def pose_start(self): print("Pose Start") result = False while (True): ret, frame = self.cam.read() cropped_img = None if len(self.right_clicks) == 2: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) humans = self.e.inference(frame, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=4.0) if len(humans) > 0: if 7 in humans[0].body_parts or 4 in humans[0].body_parts: print("Hands Detected") result = 1 break image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) cv2.imshow('tf-pose-estimation result', image) cv2.imshow(self.window_name, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): break return result
def openpose(image, model='cmu', resize='0x0', resize_out_ratio=4.0): w, h = model_wh(resize) if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # estimate human poses from a single image ! if image is None: logger.error('Image can not be read, path=%s' % image) sys.exit(-1) t = time.time() humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=resize_out_ratio) elapsed = time.time() - t ## file save human_models = [] for i, human in enumerate(humans): human_model = {} # draw point for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] human_model[i] = [body_part.x, body_part.y, body_part.score] human_models.append(human_model) print('human_models') print(human_models) write_csv('uncho.csv', human_models[0]) print(type(humans)) print('---------------humans----------------') print(humans) # humans have some elements that is equal how many human image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.imwrite("output_img" + time.time() + ".jpg", image) run_load_human_model.add_label()
class SkeletonImplement: def __init__(self, model="mobilenet_thin", target_size=(368, 368), tf_config=None, adjust_nose_position=False): self.estimator = TfPoseEstimator(get_graph_path(model), target_size=target_size, tf_config=tf_config) self.adjust_nose_position = adjust_nose_position def infer_skeleton(self, src): humans = self.estimator.inference(src, upsample_size=4.0) if len(humans) == 0: return None human = humans[0] # adjust nose position if 16 in human.body_parts and 17 in human.body_parts: right_ear, left_ear = human.body_parts[16], human.body_parts[17] nose_x, nose_y = (right_ear.x + left_ear.x) / 2, (right_ear.y + left_ear.y) / 2 if 0 in human.body_parts: human.body_parts[0].x = nose_x human.body_parts[0].y = nose_y else: human.body_parts[0] = BodyPart("0-0", 0, nose_x, nose_y, 0.5) # skip wrists under neck if 1 in human.body_parts and 4 in human.body_parts and 7 in human.body_parts: neck_y, right_wrist_y, left_wrist_y = human.body_parts[1].y, human.body_parts[4].y, \ human.body_parts[7].y if right_wrist_y > neck_y or left_wrist_y > neck_y: print("Wrists are under neck.") return None # skip unused joints for unused_index in [14, 15, 16, 17, 18]: if unused_index in human.body_parts.keys(): del human.body_parts[unused_index] if self.adjust_nose_position: nose, neck = human.body_parts[0], human.body_parts[1] if abs(nose.y - neck.y) / (abs(nose.x - neck.x) + 1e-4) < 5: human.body_parts[0].x = neck.x return human def draw_skeleton(self, img, human): return self.estimator.draw_humans(img, [human], imgcopy=False)
def detect_pose(name, pose, response, config, complex_pose_438, pid): start = time.time() images_path = [os.path.join(config["images_path"], name)] if len(images_path) == 0: raise Exception("no image with name {} found in {}".format(name, config["images_path"])) # image_sizes = [] for path in images_path: image = common.read_imgfile(path, None, None) humans = pose.inference(image, resize_to_default=True, upsample_size=4.0) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # image_sizes.append(os.path.getsize(path) * 8) computation_time = time.time() - start cpu = psutil.cpu_percent() / 100 complex_pose_438.append(computation_time * cpu * 2.8) print("\tpose + {} finished in {}s, system response in {} s, cpu in {} cycles(10^9)" .format(round(cpu, 3), round(computation_time, 4) , round(np.average(response), 4) , round(np.average(complex_pose_438), 3))) return computation_time, cpu * 100, complex_pose_438
def infinite_loop(self): while True: self.reset_params() self.get_humans() if not self.humans: continue self.choose_best_human() if not self.human: continue self.get_hands() self.calculate_hands_direction() self.calculate_pose() # Draw image = TfPoseEstimator.draw_humans(self.image, [ self.human, ], imgcopy=False) # resize image = cv2.resize(image, None, fx=self.draw_resize, fy=self.draw_resize) # Draw angles and pose image = self._draw_angle(image) # image = self._draw_hand_direction(image) # image = self._draw_wrist_position(image) image = self._draw_pose(image) image = self._draw_cmd(image) image = self._draw_prev_cmd(image) # image = self._draw_fps(image) cv2.imshow('Result', image) if self.pose: logger.info('Pose {}, {} '.format( self.pose, self.poses[self.pose]['desc'])) self.send_pose2drone() if cv2.waitKey(1) == 27: break cv2.destroyAllWindows()
def draw_humans(self, humans, frame): # this portion does not cause the bottleneck frame = TfPoseEstimator.draw_humans(frame, humans) # if len(humans) > 0: # frame = self.identify_body_gestures(frame, humans) try: cv2.putText(frame, "FPS: %f" % (1.0 / (time.time() - self.received_fps)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # logger.info("fps received %s" % (1.0 / (time.time() - self.received_fps))) except ZeroDivisionError: logger.error("FPS division error") self.received_fps = time.time() self.frame_processed_queue.put(frame)
def get_frame(self, pic): success, image = self.video.read() if pic == 'non': ret, jpeg = cv2.imencode('.jpg', image) return jpeg.tobytes() # pose detection w, h = model_wh('0x0') e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) image_h, image_w = image.shape[:2] if pic == 'pose': image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) ret, jpeg = cv2.imencode('.jpg', image) return jpeg.tobytes() print('pic', pic) ebi = cv2.imread('./static/icon/' + pic + '.png') # ebi=cv2.imread('./static/icon/ebi.png') ebi_h, ebi_w = ebi.shape[:2] for human in humans: if 0 not in human.body_parts.keys(): continue body_part = human.body_parts[0] center_1 = int(body_part.x * image_w + 0.5) - ebi_w / 2 center_2 = int(body_part.y * image_h + 0.5) - ebi_h / 2 for i in range(0, ebi_h): for j in range(0, ebi_w): if ebi[i][j][0] != 0 or ebi[i][j][1] != 0 or ebi[i][j][ 2] != 0: xx = int(center_2 + i) yy = int(center_1 + j) if xx >= 0 and xx < image_h and yy >= 0 and yy < image_w: image[xx][yy] = ebi[i][j] # We are using Motion JPEG, but OpenCV defaults to capture raw images, # so we must encode it into JPEG in order to correctly display the # video stream. ret, jpeg = cv2.imencode('.jpg', image) return jpeg.tobytes()
def draw_pose(self, img, humans, parts=[4, 7]): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None image_h, image_w, _ = img.shape if parts is None or len(parts) == 0: return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) else: for human in humans: for part in parts: if part in human.body_parts.keys(): body_part = human.body_parts[part] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) img = cv2.circle(img, coord, 2, (0, 255, 0)) return img
def get_points_from_image(image_path): w, h = model_wh(RESIZE) # load image image = common.read_imgfile(image_path, None, None) if image is None: raise FileNotFoundError('Image not found') # configure the estimator if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(MODEL), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(MODEL), target_size=(w, h)) # get points humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=RESIZE_OUT_RATIO) data = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) return data
def analyze(self): args = parserSetup.parserSetup() w, h = model_wh(args.resize) e = model(w, h, args) pos = position() #cam = cv2.VideoCapture('C:/Users/Eamonn/Programming/2020-ca400-template-repo/src/GymVisionDesktop/Videos/SkullCrushers.mp4') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() orange_color = (0, 140, 255) while True: ret_val, image = cam.read() image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) pose = humans image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) if len(pose) > 0: # distance calculations for human in humans: for i in range(len(humans)): try: pos.getPositions(human, image) self.upperArmPerpendicular(human, image, pos) self.highEnough(human, image, pos) except Exception as exs: print(exs) pass cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(1) == 27: break cv2.destroyAllWindows()
def play(args): in_data = {} in_file = getFileName(args.video) data_file = ''.join([in_file, '_', args.model]) data_file_path = os.path.join(args.output, data_file) if not os.path.exists(data_file_path): logger.error( "No pose data for the video. Run with record flag to record pose data." ) sys.exit(-1) with open(data_file_path, 'rb') as f: in_data = pickle.load(f) frame_idx = 0 cap = cv2.VideoCapture(args.video) if cap.isOpened() is False: print("Error opening video stream or file.") while cap.isOpened(): ret, image = cap.read() if image is None: logger.error('Image can not be read, path=%s' % args.image) sys.exit(-1) if frame_idx in in_data.keys(): humans = in_data[frame_idx] if not args.showBG: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.imshow('tf-pose-estimation-stub', image) frame_idx += 1 if cv2.waitKey(24) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def show_camera(self): flag, self.image = self.cap.read() if flag: humans = poseModel.inference(self.image, resize_to_default=(WIDTH > 0 and HEIGHT > 0), upsample_size=4.0) if Ui_MainWindow.isPafX: print("Now show the pafX") self.image = poseModel.pafMat self.image = np.amax(np.absolute(self.image[:, :, ::2]), axis=2) show = convertFloat(self.image) print(self.image) show = cv2.cvtColor(show, cv2.COLOR_GRAY2RGB) print(show) elif Ui_MainWindow.isPafY: self.image = poseModel.pafMat self.image = np.amax(np.absolute(self.image[:, :, 1::2]), axis=2) show = convertFloat(self.image) show = cv2.cvtColor(show, cv2.COLOR_GRAY2RGB) elif Ui_MainWindow.isHeat: self.image = poseModel.heatMat self.image = np.amax(np.absolute(self.image[:, :, :-1]), axis=2) show = convertFloat(self.image) show = cv2.cvtColor(show, cv2.COLOR_GRAY2RGB) else: self.image = TfPoseEstimator.draw_humans(self.image, humans, imgcopy=False) #show = cv2.resize(self.image, (640, 480)) show = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) show = cv2.resize(show, (640, 480)) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap( QtGui.QPixmap.fromImage(showImage)) else: self.closeVideo()
def hardcode_dances(video_file): cap = cv2.VideoCapture(video_file) if not cap.isOpened(): print("Error opening video stream/file") return e = TfPoseEstimator(get_graph_path('mobilenet_v2_large'), (720, 480)) while cap.isOpened(): ret_val, image = cap.read() humans = e.inference(image, resize_to_default=True, upsample_size=4.0) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) for human in humans: parts_dict = human.body_parts wrists_y = [] shoulders_y = [] for k, v in parts_dict.items(): if 'Wrist' in str(v.get_part_name()) or 'Elbow' in str( v.get_part_name()): wrists_y.append(v.y) elif 'Shoulder' in str(v.get_part_name()) or 'Neck' in str( v.get_part_name()): shoulders_y.append(v.y) #if all wrists are higher up than all shoulders, print "xD" #else, print ":c" if len(wrists_y) == 0 or len(shoulders_y) == 0: yield ("nothing") break wrists_y.sort() shoulders_y.sort() if wrists_y[-1] < shoulders_y[0]: yield "nae-nae" break else: yield "gangnam" break
def run(self): w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368), trt_bool=str2bool(args.tensorrt)) logger.debug('cam read+') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() logger.debug('image process+') humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) logger.debug('postprocess+') image, joints, centers = TfPoseEstimator.draw_humans(args.exercise, image, humans, imgcopy=False) # print(joints) self.change_data_signal.emit(joints, centers) logger.debug('show+') # image=cv2.resize(image,(w,h),interpolation=cv2.INTER_AREA) cv2.imshow('tf-pose-estimation result', image) # fps_time = time.time() # cv2.putText(image, # "FPS: %f" % (1.0 / (time.time() - fps_time)), # (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, # (0, 255, 0), 2) if cv2.waitKey(1) == 27: break
def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)
def _worker_th(self, frame, frame_no): logger.debug("worker processing") humans = self.tf_op.inference(frame, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=4.0) pose = '' if len(humans) > 0: humans.sort(key=lambda x: x.score, reverse=True) humans = humans[:1] # get the human with the highest score frame = TfPoseEstimator.draw_humans(frame, humans) frame, pose = PoseAppWKinesis.identify_body_gestures( frame, humans[0]) frame_package = {'frame_no': frame_no, 'frame': frame, 'pose': pose} partition_key = random.randint(1, self.n_shards) self.client.put_record(StreamName=self.result_stream, Data=pickle.dumps(frame_package), PartitionKey=str(partition_key)) logger.debug("Sent frame package to part {}".format(partition_key)) logger.debug("worker finish")
def infer(image, model='cmu', resize='0x0', resize_out_ratio=4.0): """ :param image: :param model: :param resize: :param resize_out_ratio: :return: coco_style_keypoints array """ w, h = model_wh(resize) e = get_estimator(model, resize) # estimate human poses from a single image ! image = common.read_imgfile(image, None, None) if image is None: raise Exception('Image can not be read, path=%s' % image) humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=resize_out_ratio) image_h, image_w = image.shape[:2] if "TERM_PROGRAM" in os.environ and 'iTerm' in os.environ["TERM_PROGRAM"]: image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) image_str = cv2.imencode(".jpg", image)[1].tostring() print("\033]1337;File=name=;inline=1:" + base64.b64encode(image_str).decode("utf-8") + "\a") return [(eval.write_coco_json(human, image_w, image_h), human.score) for human in humans]
args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) cap = cv2.VideoCapture(args.video) if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) image = cv2.resize(image, (720, 1280)) humans = e.inference(image) if not args.showBG: image = np.zeros(image.shape) image = e.draw_humans(image, humans, imgcopy=False) cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() if cv2.waitKey(1) == 27: break cv2.destroyAllWindows() logger.debug('finished+')
b.append(num_2[index]) b.append(num_3[index]) b.append(num_4[index]) b.append(num_5[index]) b.append(num_6[index]) b.append(num_7[index]) c = [] c.append(num_8[index]) c.append(num_9[index]) c.append(num_10[index]) c.append(num_11[index]) c.append(num_12[index]) c.append(num_13[index]) index += 1 image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) L2_norm1 = [] #상체 L2_norm2 = [] #하체 L2_nonzero1 = [] L2_nonzero2 = [] for i in range(7): try: L2_norm1.append(np.linalg.norm(np.array(a[i+1])-np.array(b[i]), ord=2)) except: L2_norm1.append(0.0) pass if L2_norm1[i] is not 0.0: L2_nonzero1.append(L2_norm1[i]) else:
def analyze(self): args = parserSetup.parserSetup() w, h = model_wh(args.resize) e = model(w, h, args) pos = position() frameCount = 1 kneeCheckCount = 0 hipcheckCount = 0 footCheckCount = 0 kneeCheckFailed = False footCheckFailed = False hipCheckPassed = False #cam = cv2.VideoCapture('C:/Users/Eamonn/Programming/2020-ca400-template-repo/src/GymVisionDesktop/Videos/SquatBad2.mp4') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() orange_color = (0, 140, 255) while True: ret_val, image = cam.read() frameCount+=1 humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) pose = humans image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) if len(pose) > 0: # distance calculations for human in humans: for i in range(len(humans)): try: pos.getPositions(human,image) if footCheckCount < 5: if self.feetSquareCheck(human,image,pos): footCheckCount+=1 if footCheckCount ==5: footCheckFailed = True footCheckCount+=1 if footCheckFailed: cv2.putText(image, "Feet were not Square", (5, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 0, 255),2) if kneeCheckCount < 5: if self.kneeCheck(human,image,pos): kneeCheckCount+=1 if kneeCheckCount ==5: kneeCheckCount+=1 kneeCheckFailed = True if kneeCheckFailed: cv2.putText(image, "Knees went too far forward", (5, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 0, 255),2) if hipcheckCount <1: if self.hipCheck(human,image,pos): hipcheckCount +=1 hipCheckPassed = True if hipCheckPassed: cv2.putText(image, "Hips went low enough", (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, ),2) except Exception as exs: print(exs) pass cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(1) == 27: break while False: break cam.release() cv2.destroyAllWindows()
def detect( model="mobilenet_thin", # A model option for being cool weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): w, h = 432, 368 e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(project) #save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference breakCond = False if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Openpose getting keypoints and individual crops print("\n") myImg = im0s.copy() keypoints, humans = getKeyPoints(myImg, e, w, h) crops = [ getCrop(point[0], myImg, 10, device, point[1] / 2) for point in keypoints ] # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Need to adjust bboxes to full image if len(pred) > 0: breakCond = True # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Check if any overlap between keypoint and det (handheld weapon) for detection in det: for crop in crops: if bbox_iou(detection, crop) > 0: cv2.putText(im0, "Spider-Sense Tingling!", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) break # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # write keypoint boxes for *xyxy, conf, cls in reversed(crops): plot_one_box(xyxy, imc, label="keyP", color=colors(c, True), line_thickness=line_thickness) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) im0 = TfPoseEstimator.draw_humans(im0, humans, imgcopy=False) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def main(): windowName = "Live Window Feed" print(windowName) cv2.namedWindow(windowName) cap = cv2.VideoCapture(0) if cap.isOpened(): ret, frame = cap.read() else: ret = False # set the servos to 90 to begin wrist_current = 7 elbow_current = 7 shoulder_current = 7 while ret: ret, frame = cap.read() #analyze the image w, h = model_wh('432x368') if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) # make an estimation on the frame # estimate human poses from a single image image = common.read_imgfile(frame, None, None) if image is None: logger.error('Image can not be read') sys.exit(-1) t = time.time() humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) elapsed = time.time() - t logger.info('inference image in %.4f seconds.' % (elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # collect the relevant keypoints # get the keypoints if len(humans) == 1: keypoints = str(str(str(humans[0]).split('BodyPart:')[1:]).split('-')).split(' score=') # parse through the results to get the specific keypoints keypoints_list=[] for k in range (len(keypoints)-1): pnt = keypoints[k][-11:-1] pnt = tuple(map(float, pnt.split(', '))) keypoints_list.append(pnt) # create a keypoints array keypts_array = np.array(keypoints_list) keypts_array = keypts_array*(image.shape[1],image.shape[0]) keypts_array = keypts_array.astype(int) # collect the necessary inferences if len(keypts_array) > 11: shoulder = keypts_array[2] elbow = keypts_array[3] wrist = keypts_array[4] hip = keypts_array[8] holder = [shoulder, elbow, wrist, hip] kp_series.append(holder) scores_series.append([scores_list[2], scores_list[3],scores_list[4],scores_list[8]]) # T1 orig_beta = 0 orig_alpha = 0 orig_gamma = 0 # T2 orig_theta = 0 orig_omega = 0 orig_phi = 0 # get the vectors shoulder_vec = kp_series[i][0] elbow_vec = kp_series[i][1] wrist_vec = kp_series[i][2] hip_vec = kp_series[i][3] # calculate the magnitudes # T1: s,e,w se_mag = magnitude(shoulder_vec, elbow_vec) sw_mag = magnitude(shoulder_vec, wrist_vec) ew_mag = magnitude(elbow_vec, wrist_vec) # T2: s,e,h # I've aready calculated se in T1 sh_mag = magnitude(shoulder_vec, hip_vec) he_mag = magnitude(hip_vec, elbow_vec) # calculate the angles results_T1 = angle_T1(se_mag,ew_mag,sw_mag) angles_T1.append(results_T1.tolist()) results_T2 = angle_T2(se_mag,sh_mag,he_mag) angles_T2.append(results_T2.tolist()) # calculate the delta T1 & T2 original_angles_T1 = np.array([orig_beta, orig_alpha, orig_gamma]) T1_frame_delta = results_T1 - original_angles_T1 T1_delta_rounded = [] # for some reason some values weren't rounding # properly so I'll take the long route out for now for unit in T1_frame_delta: val = round(unit,1) T1_delta_rounded.append(val) original_angles_T2 = np.array([orig_theta, orig_phi, orig_omega]) T2_frame_delta = results_T2 - original_angles_T2 T2_delta_rounded = [] for unit in T2_frame_delta: val = round(unit,1) T2_delta_rounded.append(val) # replace the original angles with the current angles orig_beta, orig_aplha, orig_gamma = results_T1 orig_theta, orig_phi, orig_omega = results_T2 # calculate the angle changes and write them to the servo wrist_angle = results_T1[2] w_c = wristAngle(wrist_angle, wrist_current) wrist_current = w_c elbow_angle = results_T1[1] e_c = elbowAngle(elbow_angle, elbow_current) elbow_current = e_c shoulder_angle = results_T2[0] s_c = shoulderAngle(shoulder_angle, shoulder_current) shoulder_current = s_c cv2.imshow(windowName, frame) if cv2.waitKey(1) == 27: break cv2.destroyWindow(windowName) # Clean things up at the end wrist.stop() elbow.stop() shoulder.stop() GPIO.cleanup() cap.release()
def proc_vid_folder(folder, model_resolution='656x368', model='cmu', display=False, flip=False): # Models 'cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small' #os.environ["CUDA_VISIBLE_DEVICES"]="-1" #comment this line if you want to use cuda logger = logging.getLogger('TfPoseEstimator-Video') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) logger.debug('initialization %s : %s' % (model, get_graph_path(model))) w, h = model_wh(model_resolution) e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # Create target Directories if don't exist if not os.path.exists(folder + '/vid_out'): os.mkdir(folder + '/vid_out') if not os.path.exists(folder + '/pkl_out'): os.mkdir(folder + '/pkl_out') # Loop files over the folder for filename in os.listdir(folder): if filename.endswith(".mp4") and not os.path.isfile( folder + '/vid_out/' + filename + '_out.avi') and not os.path.isfile(folder + '/vid_out/unvalid/' + filename + '_out.avi'): logger.debug('Starting file : ' + filename) # captures the video file cap = cv2.VideoCapture(folder + '/' + filename) # fps of video fps = cap.get(5) print('FPS = ', fps) fps_time = 0 # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) print('Resolution = ' + str(frame_width) + 'x' + str(frame_height)) # fourcc = cv2.VideoWriter_fourcc('M','J','P','G') fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(folder + '/vid_out/' + filename + '_out.avi', fourcc, fps, (frame_width, frame_height)) human_list = [] if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() if ret_val == True: #humans = e.inference(image) if flip: image = cv2.flip(image, 0) humans = e.inference(image, resize_to_default=True, upsample_size=4.0) #humans = e.inference(image, resize_to_default=(w > 0 and h > 0)) #, upsample_size=args.resize_out_ratio) # image + human image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # Write the frame into the file 'output.avi' out.write(image) # Display the image if display: cv2.putText( image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() tstamp = cap.get(0) # output the inference human_list.append([tstamp / 1000, humans]) if cv2.waitKey(1) & 0xFF == ord('q'): break # Break the loop else: break # save the poses in the pose dir pickle.dump( human_list, open(folder + '/pkl_out/' + filename + '_poses.pkl', 'wb')) # When everything done, release the capture cap.release() cv2.destroyAllWindows() logger.debug('finished file : ' + filename)
return ret_array #Get joints of skeleton from reference image ref_image = cv2.imread('./images/warrior_reference.jpg') e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w_cam, h_cam)) humans_ref = e.inference(ref_image, resize_to_default=1, upsample_size=resize_out_ratio) ref_joint_array = get_tuple_array(humans_ref) #Save skeleton image with black background black_background = np.zeros(ref_image.shape) ref_skeleton_img = TfPoseEstimator.draw_humans(black_background, humans_ref, imgcopy=False) filename_to_write = 'warrior_reference_skeleton.jpg' print("file", filename_to_write) cv2.imwrite(filename_to_write, ref_skeleton_img) #Create reference skeleton overlay using the black background overlay = cv2.imread('warrior_reference_skeleton.jpg') overlayMask = cv2.cvtColor(overlay, cv2.COLOR_BGR2GRAY) res, overlayMask = cv2.threshold(overlayMask, 10, 1, cv2.THRESH_BINARY_INV) #Expand the mask from 1-channel to 3-channel h, w = overlayMask.shape overlayMask = np.repeat(overlayMask, 3).reshape((h, w, 3)) #Get joints of skeleton from test image and save as image
def uploadResult(video_name): #videosFound = [video for video in videos if video["name"] == video_name] bucket_name = 'tesis-resources' #if (len(videosFound)>0): try: s3.download_file(bucket_name, video_name, './videos_API' + "/" + video_name) except: return jsonify({"message": "Video not found"}) cap = cv2.VideoCapture('./videos_API/'+ video_name)#ACA VA EL VIDEO results = [] #iterators i = 0 ci = 0 #file archi=open("temp.txt","w") archi.write("dse1_x dse1_y deh1_x deh1_y se1_x se1_y se1_z eh1_x eh1_y eh1_z dse2_x dse2_y deh2_x deh2_y se2_x se2_y se2_z eh2_x eh2_y eh2_z") archi.write('\n') #diccionarios dir2 = {"0": 0,"1bl":1,"1l": 2,"1fl":3,"1f":4,"1fr":5,"1r":6,"1br":7,"2bl":8,"2l":9,"2fl":10,"2f":11,"2fr":12,"2r":13,"3bl":14,"3l":15,"3fl":16,"3f":17,"3fr":18,"3r":19,"3b":20,"4":21} dir3 = [] for key in dir2: dir3.append(key) #modelos json_file = open('./neural_networks2/model.json', 'r') loaded_model_json = json_file.read() json_file.close() model1 = model_from_json(loaded_model_json) model1.load_weights("./neural_networks2/model.h5") #print(model1.predict_classes([[[0,0,-1]]])[0]) json_file = open('./neural_networks2/model2.json', 'r') loaded_model_json = json_file.read() json_file.close() model2 = model_from_json(loaded_model_json) model2.load_weights("./neural_networks2/model2.h5") #aqui se itera en el video para obtener las imagenes if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() if not ret_val: print("finalizado") break humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) if not True: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) poseLifting = Prob3dPose('./tf_pose/lifting/models/prob_model_params.mat') image_h, image_w = image.shape[:2] standard_w = 640 standard_h = 480 pose_2d_mpiis = [] visibilities = [] for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) transformed_pose2d, weights = poseLifting.transform_joints(pose_2d_mpiis, visibilities) pose_3d = poseLifting.compute_3d(transformed_pose2d, weights) i+=1 print(i) write_vector_data(pose_3d, archi,humans) archi.close() #leemos el temp df = pd.read_csv(r"./temp.txt",sep=' ') #predecimos con la red neuronal tempArrR = [] posesR = [] tempArrL = [] posesL = [] for i in df.index: #right rse = [df["dse1_x"][i],df["dse1_y"][i],df["se1_x"][i],df["se1_y"][i],df["se1_z"][i]] right_se = model1.predict_classes([[rse]]) reh = [df["deh1_x"][i],df["deh1_y"][i],df["eh1_x"][i],df["eh1_y"][i],df["eh1_z"][i],right_se] right_eh = model2.predict_classes([[reh]]) try: predictionR = dir3[right_se[0]]+"_"+dir3[right_eh[0]] except: print("error cargando en dir3 " +str(right_se[0])+" " + str(right_eh[0])) tempArrR.append(predictionR) #left lse = [df["dse2_x"][i],df["dse2_y"][i],df["se2_x"][i]*-1,df["se2_y"][i],df["se2_z"][i]] left_se = model1.predict_classes([[lse]]) leh = [df["deh2_x"][i],df["deh2_y"][i],df["eh2_x"][i]*-1,df["eh2_y"][i],df["eh2_z"][i],left_se] left_eh = model2.predict_classes([[leh]]) predictionL = dir3[left_se[0]]+"_"+dir3[left_eh[0]] tempArrL.append(predictionL) if((i/10).is_integer() and i != 0): modaR = moda(tempArrR) posesR.append(modaR) tempArrR = [] modaL = moda(tempArrL) posesL.append(modaL) tempArrL = [] try: modaR = moda(tempArrR) posesR.append(modaR) modaL = moda(tempArrL) posesL.append(modaL) except: print("problema") #se escribe el nuevo archivo S = "vid" #esto se cambia por el nombre del video archi2=open("resultado.txt","w") for it in posesR: vector = transformKeyToPoint(it) line = str(vector[0])+","+str(vector[1])+","+str(vector[2]) archi2.write(line) archi2.write('\n') archi2.write('a') for it in posesL: vector = transformKeyToPoint(it) line = str(vector[0])+","+str(vector[1])+","+str(vector[2]) archi2.write('\n') archi2.write(line) archi2.close() new_video_name = video_name.split(sep = '.') response = s3.upload_file(r'./resultado.txt',bucket_name,'R_'+ new_video_name[0]+".txt") return jsonify({"message": "Complete"})