示例#1
0
def plot_results(detector, pose, img, category, logging=True):
    pose_img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
    h, w = img.shape[0], img.shape[1]
    count = detector.get_object_count()
    if logging:
        logger.info(f'object_count={count}')

    for idx in range(count):
        obj = detector.get_object(idx)
        # print result
        if logging:
            logger.info(f'+ idx={idx}')
            logger.info(
                f'  category={obj.category}[ {category[obj.category]} ]')
            logger.info(f'  prob={obj.prob}')
            logger.info(f'  x={obj.x}')
            logger.info(f'  y={obj.y}')
            logger.info(f'  w={obj.w}')
            logger.info(f'  h={obj.h}')
        top_left = (int(w * obj.x), int(h * obj.y))
        bottom_right = (int(w * (obj.x + obj.w)), int(h * (obj.y + obj.h)))
        text_position = (int(w * obj.x) + 4, int(h * (obj.y + obj.h) - 8))

        # update image
        color = hsv_to_rgb(256 * obj.category / len(category), 255, 255)
        fontScale = w / 512.0
        cv2.rectangle(img, top_left, bottom_right, color, 4)

        cv2.putText(img, category[obj.category], text_position,
                    cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1)

        CATEGORY_PERSON = 0
        if obj.category != CATEGORY_PERSON:
            continue

        # pose detection
        px1, py1, px2, py2 = keep_aspect(top_left, bottom_right, pose_img,
                                         pose)

        crop_img = pose_img[py1:py2, px1:px2, :]
        offset_x = px1 / img.shape[1]
        offset_y = py1 / img.shape[0]
        scale_x = crop_img.shape[1] / img.shape[1]
        scale_y = crop_img.shape[0] / img.shape[0]
        detections = compute(pose, crop_img, offset_x, offset_y, scale_x,
                             scale_y)

        cv2.rectangle(img, (px1, py1), (px2, py2), color, 1)

        display_result(img, detections)

    return img
示例#2
0
def pose_estimation(detector, pose, img):
    pose_img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
    h, w = img.shape[0], img.shape[1]
    count = detector.get_object_count()
    pose_detections = []
    for idx in range(count):
        obj = detector.get_object(idx)
        top_left = (int(w * obj.x), int(h * obj.y))
        bottom_right = (int(w * (obj.x + obj.w)), int(h * (obj.y + obj.h)))
        CATEGORY_PERSON = 0
        if obj.category != CATEGORY_PERSON:
            pose_detections.append(None)
            continue
        px1, py1, px2, py2 = keep_aspect(top_left, bottom_right, pose_img,
                                         pose)
        crop_img = pose_img[py1:py2, px1:px2, :]
        offset_x = px1 / img.shape[1]
        offset_y = py1 / img.shape[0]
        scale_x = crop_img.shape[1] / img.shape[1]
        scale_y = crop_img.shape[0] / img.shape[0]
        detections = compute(pose, crop_img, offset_x, offset_y, scale_x,
                             scale_y)
        pose_detections.append(detections)
    return pose_detections
def action_recognition(box, input_image, pose, detector, model, data):
    if args.arch == "lw_human_pose":
        bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose(
            pose, input_image.shape[0], input_image.shape[1], get_all=True)

        idx = -1
        min_d = 32768

        for i in range(pose.get_object_count()):
            target = xywh_to_xyxy(bbox_xywh[i], input_image.shape[0],
                                  input_image.shape[1])
            d = math.sqrt((target[0] - box[0])**2 + (target[1] - box[1])**2)
            if d < min_d:
                min_d = d
                idx = i

        if idx == -1:
            return "-", None

        person = pose.get_object_pose(idx)
    else:
        bbox_xywh, cls_conf, cls_ids = get_detector_result(
            detector, input_image.shape[0], input_image.shape[1])
        px1, py1, px2, py2 = keep_aspect((box[0], box[1]), (box[2], box[3]),
                                         input_image, pose)
        crop_img = input_image[py1:py2, px1:px2, :]
        offset_x = px1 / input_image.shape[1]
        offset_y = py1 / input_image.shape[0]
        scale_x = crop_img.shape[1] / input_image.shape[1]
        scale_y = crop_img.shape[0] / input_image.shape[0]
        detections = compute(pose, crop_img, offset_x, offset_y, scale_x,
                             scale_y)
        person = detections

    openpose_keypoints = ailia_to_openpose(person)
    frame = np.expand_dims(openpose_keypoints, axis=1)
    frame = pose_postprocess(frame)

    for i in range(TIME_RANGE - 1):
        data[:,
             i, :] = data[:, i +
                          1, :]  #data: (ailia.POSE_KEYPOINT_CNT,TIME_RANGE,3)

    data[:, TIME_RANGE - 1, :] = frame[:, 0, :]

    zero_cnt = 0
    for i in range(TIME_RANGE):
        if np.sum(data[:, i, :]) == 0:
            zero_cnt = zero_cnt + 1

    if zero_cnt >= 1:
        return "-", person

    data_rgb = data.transpose((2, 1, 0))
    data_rgb = data_rgb[:2,
                        ...]  # May need to be removed if input for action model changed

    data_rgb = np.expand_dims(data_rgb, axis=3)
    data_rgb.shape = (1, ) + data_rgb.shape

    model.set_input_shape(data_rgb.shape)
    action = model.predict(data_rgb)

    action = softmax(action)
    max_prob = 0
    class_idx = 0
    for i in range(len(LABELS)):
        if max_prob <= action[0][i]:
            max_prob = action[0][i]
            class_idx = i

    return LABELS[class_idx] + " " + str(int(max_prob * 100) / 100), person