示例#1
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu()):
    x, scaled_img = gcv.data.transforms.presets.yolo.transform_test(
        img, short=480, max_size=1024)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_simple_pose(scaled_img,
                                                       class_IDs,
                                                       scores,
                                                       bounding_boxs,
                                                       output_shape=(128, 96),
                                                       ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        scale = 1.0 * img.shape[0] / scaled_img.shape[0]
        img = cv_plot_keypoints(img.asnumpy(),
                                pred_coords,
                                confidence,
                                class_IDs,
                                bounding_boxs,
                                scores,
                                box_thresh=1,
                                keypoint_thresh=0.3,
                                scale=scale)
    return img
示例#2
0
def detect_2d_joints(frame, short=360):
    """
    Args:
        short: 较短边resize大小
        frame: 任意尺寸的RGB图像

    Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素
    """
    # 缩放图像和生成目标检测器输入张量
    frame = nd.array(frame)
    x, img = data.transforms.presets.yolo.transform_test(frame, short=short)
    # print(x.shape, img.shape)
    # 检测人体
    class_ids, scores, bounding_boxes = detector(x)
    # 生成posenet的输入张量
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores,
                                                       bounding_boxes)
    # 预测关节点
    predict_heatmap = pose_net(pose_input)
    predict_coords, confidence = heatmap_to_coord(predict_heatmap,
                                                  upscale_bbox)

    # 显示2d姿态
    # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores)

    return {
        'img': img,
        'coords': predict_coords,
        'confidence': confidence,
        'class_ids': class_ids,
        'bboxes': bounding_boxes,
        'scores': scores
    }
示例#3
0
def predict_fn(input_object, model):

    try:
        if os.environ['USE_EIA'] == "1":
            device = mx.eia()
            img, cid, scores, bbox = copy_to_device(input_object, device)
        elif os.environ['USE_GPU'] == "1":
            device = mx.gpu()
            img, cid, scores, bbox = copy_to_device(input_object, device)
        else:
            device = mx.cpu()
            img, cid, scores, bbox = input_object
    except:
        device = mx.cpu()
        img, cid, scores, bbox = input_object
        logger.error("Failed to load data into desired context")

    pose_input, upscale_bbox = detector_to_simple_pose(img, cid, scores, bbox)
    predicted_heatmap = model(pose_input.as_in_context(device))
    predicted_heatmap = model(pose_input)
    keypoints, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    c = cid[0].asnumpy().reshape(cid[0].shape[0] * cid[0].shape[1])
    s = scores[0].asnumpy().reshape(scores[0].shape[0] * scores[0].shape[1])
    bb = bbox[0].asnumpy().reshape(bbox[0].shape[0] * bbox[0].shape[1])

    kp = keypoints.asnumpy().reshape(keypoints.shape[0] * keypoints.shape[1] *
                                     keypoints.shape[2])
    cfd = confidence.asnumpy().reshape(
        confidence.shape[0] * confidence.shape[1] * confidence.shape[2])

    return np.concatenate((c, s, bb, kp, cfd))
示例#4
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None):
    x, img = gcv.data.transforms.presets.yolo.transform_test(img,
                                                             short=512,
                                                             max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img,
                                                       class_IDs,
                                                       scores,
                                                       bounding_boxs,
                                                       ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        axes = plot_keypoints(img,
                              pred_coords,
                              confidence,
                              class_IDs,
                              bounding_boxs,
                              scores,
                              box_thresh=0.5,
                              keypoint_thresh=0.2,
                              ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes
    def process_pose_frame(np_frame, resolution):
        width, height = resolution
        if np_frame is None:
            return mxnet.nd.zeros((height, width, 3), ctx=gpu)

        frame = mxnet.nd.array(np_frame, ctx=gpu)
        x, img = data.transforms.presets.yolo.transform_test(frame, short=512)

        class_IDs, scores, bounding_boxs = detector(x)

        pose_input, upscale_bbox = detector_to_simple_pose(
            img, class_IDs, scores, bounding_boxs)

        if pose_input is None:
            return

        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)
        img = cv_plot_keypoints(img,
                                pred_coords,
                                confidence,
                                class_IDs,
                                bounding_boxs,
                                scores,
                                box_thresh=0.5,
                                keypoint_thresh=0.2,
                                scale=1.0,
                                **kwargs)
        print(img.size)
        # for j in range(len(pred_coords)):
        # 	for i in range(len(pred_coords[0])):
        # 		x, y = pred_coords[j][i].astype(int).asnumpy()
        # 		cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        return img
def main():
    args = parse_args()
    network = None

    scale = 1.0

    detector = get_model('yolo_darknet53_coco', pretrained=True)
    detector.reset_class(['person'], reuse_weights=['person'])

    if args.type == 'ONNX':
        network = cv2.dnn.readNetFromONNX(args.model)

    elif args.type == 'OpenVINO':
        network = cv2.dnn.readNetFromModelOptimizer(args.xml, args.model)

    # default backend if wasn`t specified
    if not args.backend:
        network.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)

    # in case you are going to use CUDA backend in OpenCV, make sure that opencv built with CUDA support
    elif args.backend == 'CUDA':
        network.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        network.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

    # in case you are going to use OpenVINO model, make sure that inference engine already installed and opencv built with IE support
    elif args.backend == 'INFERENCE':
        network.setPreferableBackend(cv2.dnn.DNN_BACKEND_INFERENCE_ENGINE)
        network.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

    x, img = data.transforms.presets.yolo.load_test(args.img, short=512)
    class_IDs, scores, bounding_boxes = detector(x)

    pose_input, upscaled_bbox = detector_to_simple_pose(
        img, class_IDs, scores, bounding_boxes)

    pose_input = pose_input.asnumpy()
    bs = []
    for i in range(pose_input.shape[0]):
        input = cv2.dnn.blobFromImage(
            np.transpose(np.squeeze(pose_input[i, :, :, :]), (1, 2, 0)), scale,
            (args.width, args.height), (0, 0, 0), False)
        network.setInput(input)
        temp = network.forward()
        bs.append(temp)

    output = np.concatenate(bs, axis=0)

    output = mx.nd.array(output)
    pred_coords, confidence = heatmap_to_coord(output, upscaled_bbox)

    ax = plot_keypoints(img,
                        pred_coords,
                        confidence,
                        class_IDs,
                        bounding_boxes,
                        scores,
                        box_thresh=0.5,
                        keypoint_thresh=0.2)
    plt.show()
示例#7
0
def get_full_frame_info(a_frame):
    x, frame = transform_test(a_frame, short=512)
    class_IDs, scores, bounding_boxs = detector(x)
    pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs)
    if len(upscale_bbox)>0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
        pred_coords = pred_coords.asnumpy()
    return  class_IDs, scores, upscale_bbox, pred_coords, confidence, bounding_boxs
示例#8
0
def predict(img_path):
    # 1.检测关节点并显示
    # 预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)
    # print("Shape of pre-processed image:", x.shape)

    start = time.time()

    # detect persons and bbox
    class_ids, scores, bounding_boxes = detector(x)

    # 2.预处理检测器的输出张量作为alpha_pose的输入
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores, bounding_boxes)

    global detector_time
    detector_time += (time.time() - start)

    print("detector cost time: {:.3f} seconds".format(time.time() - start))
    prepare_end = time.time()

    # 3.预测关节点
    if pose_input is None:
        return None, None
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
    global predictor_2d_time
    predictor_2d_time += (time.time() - prepare_end)
    print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - prepare_end))

    # 4.显示2d姿态
    # utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5,
    #                          keypoint_thresh=0.2)

    # 5.坐标标准化
    prepare_end = time.time()
    kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0])

    receptive_field = pose3d_predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 6.创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False)

    # 7.3d姿势估计和显示
    prediction = predict_3d_pos(generator, pose3d_predictor)
    global full_time, predictor_3d_time
    predictor_3d_time += time.time() - prepare_end
    full_time += time.time() - start
    print("3d predictor time: {:.3f} seconds".format(time.time() - prepare_end))

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    return prediction, img
示例#9
0
def keypoint_detection(img_path, detector, pose_net):
    x, img = data.transforms.presets.yolo.load_test(img_path, short=512)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                        box_thresh=0.5, keypoint_thresh=0.2)
    plt.show()
示例#10
0
def keypoint_detection(img_path, detector, pose_net):
    x, img = data.transforms.presets.yolo.load_test(img_path, short=512)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                        box_thresh=0.5, keypoint_thresh=0.2)
    plt.show()
def getPose(im_fname):
    x, img = data.transforms.presets.ssd.load_test(im_fname, short=512)
    #print('Shape of pre-processed image:', x.shape)

    class_IDs, scores, bounding_boxs = detector(x)
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores,
                                                       bounding_boxs)

    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    return img, pred_coords, confidence, class_IDs, bounding_boxs, scores, upscale_bbox
def keypoint_detection(i,
                       frame,
                       imagepath,
                       detector,
                       pose_net,
                       ctx=mx.cpu(),
                       axes=None):

    global pause_time

    x, img = gcv.data.transforms.presets.yolo.transform_test(frame,
                                                             short=512,
                                                             max_size=1024)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img,
                                                       class_IDs,
                                                       scores,
                                                       bounding_boxs,
                                                       output_shape=(1024,
                                                                     768),
                                                       ctx=ctx)

    #print(pose_input,"\n")
    if len(upscale_bbox) > 0:

        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        hackathon_action(i, frame, imagepath, pred_coords, confidence,
                         class_IDs, bounding_boxs, scores)

        axes = plot_keypoints(img,
                              pred_coords,
                              confidence,
                              class_IDs,
                              bounding_boxs,
                              scores,
                              box_thresh=0.5,
                              keypoint_thresh=0.2,
                              ax=axes)
        plt.draw()
        plt.pause(pause_time)
        #plt.pause(1.0)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(pause_time)

    return axes
示例#13
0
文件: net.py 项目: Rainweic/Hi-Dancer
def detection(net, image, use_gpu):
    '''
    进行预测:

    input:
        net(dict):     模型
        image(str):     图片(numpy)
        use_gpu(bool):  是否使用gpu
    return:
        pred(dict):     包含各种信息的字典(若未检测到人则该返回值为None)
        img(numpy):     图片
    '''
    if use_gpu:
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()
    img_adarry = nd.array(image)
    x, img = transform_test(img_adarry,
                            short=512,
                            max_size=1024,
                            mean=(0.485, 0.456, 0.406),
                            std=(0.229, 0.224, 0.225))
    if use_gpu:
        # 转移至GPU
        x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = net['detector'](x)
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores,
                                                       bounding_boxs)

    if len(upscale_bbox) == 0:
        # 图片中未检测到人
        return None, img

    if use_gpu:
        # 转移至GPU
        pose_input = pose_input.as_in_context(ctx)
    predicted_heatmap = net['pose_net'](pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    if use_gpu:
        # 转移至GPU
        pred_coords = pred_coords.as_in_context(ctx)

    pred = {
        'class_IDs': class_IDs,
        'scores': scores,
        'bounding_boxs': bounding_boxs,
        'pred_coords': pred_coords,
        'confidence': confidence
    }
    return pred, img
def get_pose_estimation(img_object,
                        detector_model="yolo3_mobilenet1.0_coco",
                        pose_model="simple_pose_resnet18_v1b",
                        box_thresh=0.5,
                        keypoint_thresh=0.2):
    '''
	//TODO
	'''
    detector = model_zoo.get_model(detector_model, pretrained=True)

    pose_net = model_zoo.get_model(pose_model, pretrained=True)

    # Loading weights for only person class
    detector.reset_class(["person"], reuse_weights=['person'])

    try:
        img_object = utils.download(img_object)
    except ValueError:
        pass

    if "yolo" in detector_model:
        x, img = data.transforms.presets.yolo.load_test(img_object, short=512)
    elif "ssd" in detector_model:
        x, img = data.transforms.presets.ssd.load_test(img_object, short=512)

    class_IDs, scores, bounding_boxs = detector(x)

    if "simple_pose" in pose_model:
        pose_input, upscale_bbox = detector_to_simple_pose(
            img, class_IDs, scores, bounding_boxs)
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)
    elif "alpha_pose" in pose_model:
        pose_input, upscale_bbox = detector_to_alpha_pose(
            img, class_IDs, scores, bounding_boxs)
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord_alpha_pose(
            predicted_heatmap, upscale_bbox)

    ax = utils.viz.plot_keypoints(img,
                                  pred_coords,
                                  confidence,
                                  class_IDs,
                                  bounding_boxs,
                                  scores,
                                  box_thresh=box_thresh,
                                  keypoint_thresh=keypoint_thresh)

    return ax
    def Infer(self,
              img_path,
              output_path="result.jpg",
              bbox_thresh=0.5,
              kp_thresh=0.2):

        x, img = data.transforms.presets.ssd.load_test(img_path, short=512)
        x = x.copyto(self.system_dict["local"]["ctx"][0])
        print('Shape of pre-processed image:', x.shape)

        print('Running Person Detector')
        class_IDs, scores, bounding_boxs = self.system_dict["local"][
            "detector"](x)

        print('Running Pose Estimator')
        pose_input, upscale_bbox = detector_to_simple_pose(
            img, class_IDs, scores, bounding_boxs)

        pose_input = pose_input.copyto(self.system_dict["local"]["ctx"][0])
        predicted_heatmap = self.system_dict["local"]["posenet"](pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        print('Saving Result')
        img = utils.viz.cv_plot_keypoints(img,
                                          pred_coords,
                                          confidence,
                                          class_IDs,
                                          bounding_boxs,
                                          scores,
                                          box_thresh=0.5,
                                          keypoint_thresh=0.2)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        cv2.imwrite(output_path, img)
        print('Done')

        result = {}
        result["pred_coords"] = pred_coords
        result["confidence"] = confidence
        result["class_IDs"] = class_IDs
        result["bounding_boxs"] = bounding_boxs
        result["scores"] = scores

        return result
示例#16
0
def detector_kp(image, bbox):

    class_IDs = mx.nd.array([[[0.]]], mx.gpu())
    scores = mx.nd.array([[[1.0]]], mx.gpu())
    bounding_boxs = mx.nd.array([[bbox]], mx.gpu())

    pose_input, upscale_bbox = detector_to_simple_pose(image, class_IDs,
                                                       scores, bounding_boxs)

    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    print('coord', pred_coords.asnumpy().shape)
    print('confidence', confidence.asnumpy().shape)

    kps = np.concatenate([pred_coords.asnumpy(), confidence.asnumpy()], axis=2)

    return True, np.squeeze(kps, axis=0)
示例#17
0
    def detect_main_point(self, capture):
        #axes = None
        #num_frames = len(F)
        #for i in range(num_frames):
        while True:
            ret, frame = capture.read()
            if ret is None:
                break
            #frame = F[i]
            frame = mx.nd.array(cv2.cvtColor(
                frame, cv2.COLOR_BGR2RGB)).astype('uint8')
            x, frame = gcv.data.transforms.presets.ssd.transform_test(
                frame, short=512, max_size=350)
            x = x.as_in_context(self.ctx)
            class_IDs, scores, bounding_boxs = self.detector(x)

            pose_input, upscale_bbox = detector_to_simple_pose(
                frame,
                class_IDs,
                scores,
                bounding_boxs,
                output_shape=(128, 96),
                ctx=ctx)
            if len(upscale_bbox) > 0:
                predicted_heatmap = self.estimator(pose_input)
                pred_coords, confidence = heatmap_to_coord(
                    predicted_heatmap, upscale_bbox)

                #img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                #                        box_thresh=0.5, keypoint_thresh=0.2)
                # mark important point of body

                # img = cv2.circle(img, (x, y), 4, (0, 255, 0), -1)
                # mark knee and sholders

                # img = cv2.circle(img, kn, 4, (0, 255, 0), -1)
                # img = cv2.circle(img, shl, 4, (0, 255, 0), -1)
                _, plv = pred_coords.asnumpy()[:, 11][0]
                _, shl = pred_coords.asnumpy()[:, 5][0]
                _, kn = pred_coords.asnumpy()[:, 14][0]
                self.trajectory['palvic'].append(plv)
                self.trajectory['knee'].append(kn)
                self.trajectory['shoulders'].append(shl)
def detect_2d_joints(frame, cur_frame, short=360):
    """
    Args:
        cur_frame: 当前帧
        short: 较短边resize大小
        frame: 任意尺寸的RGB图像

    Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素
    """
    global pre_frame, gap, csb
    # print("current frame: {}".format(cur_frame))
    # 缩放图像和生成目标检测器输入张量
    frame = nd.array(frame)
    x, img = data.transforms.presets.yolo.transform_test(frame, short=short)
    # print(x.shape, img.shape)
    # 检测人体
    if csb is None or cur_frame - pre_frame == gap:
        class_ids, scores, bounding_boxes = detector(x)
        pre_frame = cur_frame
        csb = (class_ids, scores, bounding_boxes)
    else:
        class_ids, scores, bounding_boxes = csb

    # 生成posenet的输入张量
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores,
                                                       bounding_boxes)
    pose_input, upscale_bbox = pose_input[:1], upscale_bbox[:1]
    # 预测关节点
    predict_heatmap = pose_net(pose_input)
    predict_coords, confidence = heatmap_to_coord(predict_heatmap,
                                                  upscale_bbox)

    # 显示2d姿态
    # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores)

    return {
        'img': img,
        'coords': predict_coords,
        'confidence': confidence,
        'class_ids': class_ids,
        'bboxes': bounding_boxes,
        'scores': scores
    }
示例#19
0
def get_skeleton_from_frame(a_frame):
    ok_flag = False
    extra_person_flag = False
    x, frame = transform_test(a_frame, short=512)
    class_IDs, scores, bounding_boxs = detector(x)
    pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs)
    b_coords = 0
    if len(upscale_bbox)>0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
        pred_coords = pred_coords.asnumpy()
        b_coords = pred_coords[0]
        if pred_coords.shape[0]>2:
            extra_person_flag = True
        if pred_coords.shape[0]==2:
            # doing best guest when two boxes ( subject and background are similar)
            if upscale_bbox[0][3]==512:
                b_coords = pred_coords[1]
        ok_flag= True
    return ok_flag, extra_person_flag, b_coords
示例#20
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None):
    x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs,
                                                       output_shape=(128, 96), ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

        axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                              box_thresh=0.5, keypoint_thresh=0.2, ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes
示例#21
0
    def extract_pose_from_image(self, img, values):
        class_IDs, scores, bounding_boxs = self.detector(values)

        pose_input, upscale_bbox = detector_to_simple_pose(
            img, class_IDs, scores, bounding_boxs)
        predicted_heatmap = self.pose_net(pose_input)
        # left hand 5 7 9
        # right hand 6 8 10
        # left leg 11 13 15
        # right leg 12 14 16
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        ax = utils.viz.plot_keypoints(img,
                                      pred_coords,
                                      confidence,
                                      class_IDs,
                                      bounding_boxs,
                                      scores,
                                      box_thresh=0.5,
                                      keypoint_thresh=0.2)

        return pred_coords[0]
示例#22
0
    def keypoint_detection(self, frame):
        img = mx.nd.array(cv2.cvtColor(frame,
                                       cv2.COLOR_BGR2RGB)).astype('uint8')
        x, scaled_img = gcv.data.transforms.presets.yolo.transform_test(
            img, short=480, max_size=1024)
        x = x.as_in_context(self.ctx)
        class_IDs, scores, bounding_boxs = self.person_detector(x)
        pred_coords = np.zeros(1)
        pose_input, upscale_bbox = detector_to_simple_pose(scaled_img,
                                                           class_IDs,
                                                           scores,
                                                           bounding_boxs,
                                                           output_shape=(128,
                                                                         96),
                                                           ctx=self.ctx)
        if len(upscale_bbox) > 0:
            predicted_heatmap = self.pose_estimator(pose_input)
            pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                       upscale_bbox)
            scale = 1.0 * img.shape[0] / scaled_img.shape[0]
            img = cv_plot_keypoints(frame,
                                    pred_coords,
                                    confidence,
                                    class_IDs,
                                    bounding_boxs,
                                    scores,
                                    box_thresh=1,
                                    keypoint_thresh=0.3,
                                    scale=scale)
            pred_coords *= scale

        if isinstance(img, mx.nd.NDArray):
            img = frame
        if isinstance(pred_coords, mx.nd.NDArray):
            pred_coords = pred_coords.asnumpy()
        return pred_coords, img
示例#23
0
def predict(img_path):
    # 1.预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)
    # detector.summary(x)
    # print("x.shape:", x.shape)

    start = time.time()

    # detect persons and bbox,
    class_ids, scores, bounding_boxes = detector(x)  # shape: [sample_idx, class_idx, instance]
    # print("bounding_boxes.shape", bounding_boxes.shape, "bounding_boxes[0, 0]:", bounding_boxes[0, 0])

    # 2.预处理检测器的输出张量作为mobile_pose的输入
    pose_input, upscale_bbox = detector_to_mobile_pose(img, class_ids, scores, bounding_boxes)
    print("detector cost time: {:.3f} seconds".format(time.time() - start))
    global detector_time
    detector_time += (time.time() - start)

    if pose_input is None:
        return None, None
    # 4.2d关节点预测
    # pose_net.summary(pose_input)
    start_time = time.time()
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
    # print("type(pre_coords): {}, shape(pre_coords): {}".format(type(pred_coords), pred_coords.shape))
    # print("pred_coords: {}".format(pred_coords))
    global predictor_2d_time
    predictor_2d_time += (time.time() - start_time)
    print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time))

    # 5.显示2d姿态
    # ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5,
    #                               keypoint_thresh=0.2)
    # print(pred_coords)
    # 6.坐标标准化
    start_time = time.time()
    kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0])
    # print('kps.type: {}, kps.shape: {}'.format(type(kps), kps.shape))

    # 7.2d keypoints生成器
    receptive_field = pose3d_predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False)

    # 8.3d姿势估计和显示
    prediction = predict_3d_pos(generator, pose3d_predictor)
    global predictor_3d_time, full_time
    predictor_3d_time += (time.time() - start_time)
    full_time += (time.time() - start)
    print("3d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time))
    # print("prediction.shape: ", prediction.shape)

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    elapsed = time.time() - start
    print("Total elapsed time of predicting image file {}: {:.3f} seconds".format(img_path, elapsed))
    return prediction, img
pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores,
                                                   bounding_boxs)

######################################################################
# Predict with a Simple Pose network
# --------------------
#
# Now we can make prediction.
#
# A Simple Pose network predicts the heatmap for each joint (i.e. keypoint).
# After the inference we search for the highest value in the heatmap and map it to the
# coordinates on the original image.

predicted_heatmap = pose_net(pose_input)
pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)


def dist(np1, np2):
    #print(np1)
    #print(np2)
    #print('type')
    #a mess, this is a ndarray. item get item as numpy, float: https://gluon.mxnet.io/chapter01_crashcourse/ndarray.html#Slicing
    #print(type(np.linalg.norm(np1-np2).item().asnumpy().item()))
    #HOW do you get a SIMPLE NUMBER result? asnumpy() is normal numpy array you can get the item():
    #print('dist %s' % (np.linalg.norm(np1-np2).item().asnumpy().item(),))
    return np.linalg.norm(np1 - np2).item().asnumpy().item()


for person in (pred_coords):
    print(person)
示例#25
0
#
# In order to make sure the bounding box has included the entire person,
# we usually slightly upscale the box size.

pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)

######################################################################
# Predict with a Simple Pose network
# --------------------
#
# Now we can make prediction.
#
# A Simple Pose network predicts the heatmap for each joint (i.e. keypoint).
# After the inference we search for the highest value in the heatmap and map it to the
# coordinates on the original image.

predicted_heatmap = pose_net(pose_input)
pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

######################################################################
# Display the pose estimation results
# ---------------------
#
# We can use :py:func:`gluoncv.utils.viz.plot_keypoints` to visualize the
# results.

ax = utils.viz.plot_keypoints(img, pred_coords, confidence,
                              class_IDs, bounding_boxs, scores,
                              box_thresh=0.5, keypoint_thresh=0.2)
plt.show()
示例#26
0
def main():
    start = time.time()
    count = 0
    pose = None
    skip_frame = False  # to increase frame rate, I only process every other frame.

    #with open('data.txt', 'a') as outfile:

    while True:  # While there is video frames to process...

        frame = vs.read()

        if using_vid_file:
            exists, frame = (
                frame
            )  #with vid file, frame is a tuple. First value is boolean second is array of pixels
            if not exists:
                break  #exit when there are no incoming frames
        try:
            frame = np.fliplr(
                frame)  # I want to display the mirror image of input
        except ValueError:
            print(
                '[ERROR] video file not found, make sure to include path and extension i.e. \'./vid.mp4\''
            )
            break
        count += 1
        frame = imutils.resize(frame, width=280)
        frame = mx.nd.array(cv2.cvtColor(frame,
                                         cv2.COLOR_BGR2RGB)).astype('uint8')
        if not skip_frame:
            x, frame = gcv.data.transforms.presets.ssd.transform_test(
                frame, short=512, max_size=280)
            x = x.as_in_context(ctx)

            class_IDs, scores, bounding_boxs = detector(x)

            pose_input, upscale_bbox = detector_to_simple_pose(
                frame,
                class_IDs,
                scores,
                bounding_boxs,
                output_shape=(128, 96),
                ctx=ctx)
            if len(upscale_bbox) > 0:
                predicted_heatmap = estimator(pose_input)
                pred_coords, confidence = heatmap_to_coord(
                    predicted_heatmap, upscale_bbox)
                img, pose = cv_plot_keypoints(frame,
                                              pred_coords,
                                              confidence,
                                              class_IDs,
                                              None,
                                              scores,
                                              box_thresh=0.5,
                                              keypoint_thresh=0.15)

                #The following lines were for saving vid info to a file
                #pose = args['vid'].split('/')
                #pose = pose[2][:-4]
                #outfile.write(pose + ', ')
                #for angle in angles:
                #    outfile.write(str(angle) + ', ')
                #outfile.write('\n')

                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            skip_frame = True

        else:
            skip_frame = False
        img = imutils.resize(img, height=280,
                             width=500)  # blowup image for displaying

        if pose:
            cv2.putText(img, '{}'.format(pose), (20, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        else:
            cv2.putText(img, 'No Pose Detected', (20, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
        if using_vid_file:
            vid_writer.write(img)
        cv2.imshow('Webcam', img)
        key = cv2.waitKey(1) & 0xFF

        if key == ord("q"):
            break

    cv2.destroyAllWindows()
    if not using_vid_file:
        vs.stop()
    if using_vid_file:
        vid_writer.release()
    stop = time.time()
    #outfile.close()
    print("fps: {}".format(count / (stop - start)))