def plot_results(detector, pose, img, category, logging=True): pose_img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) h, w = img.shape[0], img.shape[1] count = detector.get_object_count() if logging:'object_count={count}') for idx in range(count): obj = detector.get_object(idx) # print result if logging:'+ idx={idx}') f' category={obj.category}[ {category[obj.category]} ]')' prob={obj.prob}')' x={obj.x}')' y={obj.y}')' w={obj.w}')' h={obj.h}') top_left = (int(w * obj.x), int(h * obj.y)) bottom_right = (int(w * (obj.x + obj.w)), int(h * (obj.y + obj.h))) text_position = (int(w * obj.x) + 4, int(h * (obj.y + obj.h) - 8)) # update image color = hsv_to_rgb(256 * obj.category / len(category), 255, 255) fontScale = w / 512.0 cv2.rectangle(img, top_left, bottom_right, color, 4) cv2.putText(img, category[obj.category], text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1) CATEGORY_PERSON = 0 if obj.category != CATEGORY_PERSON: continue # pose detection px1, py1, px2, py2 = keep_aspect(top_left, bottom_right, pose_img, pose) crop_img = pose_img[py1:py2, px1:px2, :] offset_x = px1 / img.shape[1] offset_y = py1 / img.shape[0] scale_x = crop_img.shape[1] / img.shape[1] scale_y = crop_img.shape[0] / img.shape[0] detections = compute(pose, crop_img, offset_x, offset_y, scale_x, scale_y) cv2.rectangle(img, (px1, py1), (px2, py2), color, 1) display_result(img, detections) return img
def pose_estimation(detector, pose, img): pose_img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) h, w = img.shape[0], img.shape[1] count = detector.get_object_count() pose_detections = [] for idx in range(count): obj = detector.get_object(idx) top_left = (int(w * obj.x), int(h * obj.y)) bottom_right = (int(w * (obj.x + obj.w)), int(h * (obj.y + obj.h))) CATEGORY_PERSON = 0 if obj.category != CATEGORY_PERSON: pose_detections.append(None) continue px1, py1, px2, py2 = keep_aspect(top_left, bottom_right, pose_img, pose) crop_img = pose_img[py1:py2, px1:px2, :] offset_x = px1 / img.shape[1] offset_y = py1 / img.shape[0] scale_x = crop_img.shape[1] / img.shape[1] scale_y = crop_img.shape[0] / img.shape[0] detections = compute(pose, crop_img, offset_x, offset_y, scale_x, scale_y) pose_detections.append(detections) return pose_detections
def action_recognition(box, input_image, pose, detector, model, data): if args.arch == "lw_human_pose": bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose( pose, input_image.shape[0], input_image.shape[1], get_all=True) idx = -1 min_d = 32768 for i in range(pose.get_object_count()): target = xywh_to_xyxy(bbox_xywh[i], input_image.shape[0], input_image.shape[1]) d = math.sqrt((target[0] - box[0])**2 + (target[1] - box[1])**2) if d < min_d: min_d = d idx = i if idx == -1: return "-", None person = pose.get_object_pose(idx) else: bbox_xywh, cls_conf, cls_ids = get_detector_result( detector, input_image.shape[0], input_image.shape[1]) px1, py1, px2, py2 = keep_aspect((box[0], box[1]), (box[2], box[3]), input_image, pose) crop_img = input_image[py1:py2, px1:px2, :] offset_x = px1 / input_image.shape[1] offset_y = py1 / input_image.shape[0] scale_x = crop_img.shape[1] / input_image.shape[1] scale_y = crop_img.shape[0] / input_image.shape[0] detections = compute(pose, crop_img, offset_x, offset_y, scale_x, scale_y) person = detections openpose_keypoints = ailia_to_openpose(person) frame = np.expand_dims(openpose_keypoints, axis=1) frame = pose_postprocess(frame) for i in range(TIME_RANGE - 1): data[:, i, :] = data[:, i + 1, :] #data: (ailia.POSE_KEYPOINT_CNT,TIME_RANGE,3) data[:, TIME_RANGE - 1, :] = frame[:, 0, :] zero_cnt = 0 for i in range(TIME_RANGE): if np.sum(data[:, i, :]) == 0: zero_cnt = zero_cnt + 1 if zero_cnt >= 1: return "-", person data_rgb = data.transpose((2, 1, 0)) data_rgb = data_rgb[:2, ...] # May need to be removed if input for action model changed data_rgb = np.expand_dims(data_rgb, axis=3) data_rgb.shape = (1, ) + data_rgb.shape model.set_input_shape(data_rgb.shape) action = model.predict(data_rgb) action = softmax(action) max_prob = 0 class_idx = 0 for i in range(len(LABELS)): if max_prob <= action[0][i]: max_prob = action[0][i] class_idx = i return LABELS[class_idx] + " " + str(int(max_prob * 100) / 100), person