Пример #1
0
    def get_pose(self):
        if self.image is None:
            return []

        input_scale = PoseService.base_height / self.image.shape[0]
        scaled_img = cv2.resize(self.image,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        scaled_img = scaled_img[:, 0:scaled_img.shape[1] -
                                (scaled_img.shape[1] % PoseService.stride
                                 )]  # better to pad, but cut out for demo

        PoseService.fx = np.float32(0.8 * self.image.shape[1])

        inference_result = PoseService.net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale,
                                         PoseService.stride, PoseService.fx,
                                         False)

        if len(poses_3d):
            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]

        # draw_poses(self.image, poses_2d)

        # cv2.imwrite('2d.png', self.image)

        if poses_3d.size == 0 or poses_2d.size == 0:
            return {"pose2D": [], "pose3D": []}

        return {
            "pose2D": poses_2d[0].flatten().tolist(),
            "pose3D": poses_3d[0].flatten().tolist()
        }
Пример #2
0
def calcPoses(image, input_scale, fx):
    stride = 8
    inference_result = net.infer(image)
    poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx,
                                     True)

    return poses_3d, poses_2d
    def eval(self, frame):

        input_scale = self.model_input_height / frame.shape[0]
        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        # Inference
        inference_result = self.inference_engine.infer(scaled_img)
        # Postprocessing (grouping)
        poses_2d = parse_poses(inference_result, input_scale, self.stride,
                               self.upsample_ratio)  #, threshold)

        return poses_2d
Пример #4
0
    def NextFrame(self, event):
        ret, rgb = self.capture.read()
        self.counter_label.SetLabel("Times: " + str(self.count))

        if ret:
            input_scale = self.base_height / rgb.shape[0]
            scaled_img = cv2.resize(rgb, dsize=None, fx=input_scale, fy=input_scale)
            inference_result = self.inference_engine.infer(scaled_img)
            poses_2d = parse_poses(inference_result, input_scale, 8, -1, True)
            draw_poses(rgb, poses_2d)
            if self.motion=="jumpingjack":
                self.up,self.count = count_jumpup(poses_2d,self.up,self.count)
            elif self.motion=="situp":
                self.up,self.count = count_situp(poses_2d,self.up,self.count)
            elif self.motion=="squat":
                self.up,self.count = count_squat(poses_2d,self.up,self.count)
            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
            self.bmp.CopyFromBuffer(rgb)
            self.Refresh()
        else:
            self.timer.Stop()
Пример #5
0
    def eval(self, frame):
        self.frame = frame

        base_height = 256
        scale = base_height / self.frame.shape[0]
        scaled_img = cv2.resize(self.frame, dsize=None, fx=scale, fy=scale)
        inference_result = self.net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, scale, 8, 1)

        if self.draw_render:
            draw_poses(self.frame, poses_2d)

        if poses_2d.shape[
                0] != 0:  # When no person is detected, shape = (), else (nb_persons, 25, 3)
            self.body_kps = np.array([
                np.array(poses_2d[pose_id][0:-1]).reshape((-1, 3))
                for pose_id in range(len(poses_2d))
            ])

            # We sort persons by their an "estimation" of their size
            # size has little to do with the real size of a person, but is a arbitrary value, here, calculated as distance(Nose, Neck) + 0.33*distance(Neck,Midhip)
            sizes = np.array([
                self.length(pairs_spine, person_idx=i, coefs=[1, 0.33])
                for i in range(self.body_kps.shape[0])
            ])

            # Sort from biggest size to smallest
            order = np.argsort(-sizes)
            sizes = sizes[order]
            self.body_kps = self.body_kps[order]

            # Keep only the biggest person
            self.body_kps = self.body_kps[0]

            self.nb_persons = 1
        else:
            self.nb_persons = 0
            self.body_kps = []

        return self.nb_persons, self.body_kps
Пример #6
0
            args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(),
        (frame.shape[1], frame.shape[0])):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        current_time = cv2.getTickCount()
        input_scale = base_height / frame.shape[0]
        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

        inference_result = inference_engine.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride,
                                         fx, is_video)
        edges = []
        if len(poses_3d) > 0:
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES +
                     19 * np.arange(poses_3d.shape[0]).reshape(
                         (-1, 1, 1))).reshape((-1, 2))
        plotter.plot(canvas_3d, poses_3d, edges)
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
    check_file_existance(FILE_PATH)

    # prepare input data
    canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8)
    plotter = Plotter3d(canvas_3d.shape[:2])
    canvas_3d_window_name = 'Canvas3D'
    cv2.namedWindow(canvas_3d_window_name)
    cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback)

    with open(FILE_PATH, 'r') as f:
        extrinsics = json.load(f)

    R = np.array(extrinsics['R'], dtype=np.float32)
    t = np.array(extrinsics['t'], dtype=np.float32)

    if args.video is None:
        frame_provider = ImageReader([args.input])
        is_video = False
    else:
        frame_provider = VideoReader(args.video)
        is_video = True

    fx = -1
    delay = 1
    esc_code = 27
    p_code = 112
    space_code = 32
    mean_time = 0
    img_mean = np.array([128, 128, 128], dtype=np.float32)
    base_width_calculated = False

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # inference
    for frame_id, frame in enumerate(frame_provider):
        current_time = cv2.getTickCount()
        if frame is None:
            break

        if not base_width_calculated:
            IMAGE_WIDTH = frame.shape[1] * (IMAGE_HEIGHT / frame.shape[0])
            IMAGE_WIDTH = int(IMAGE_WIDTH / STRIDE) * STRIDE
            net.set_input_shape((1, 3, IMAGE_HEIGHT, IMAGE_WIDTH))
            base_width_calculated = True

        input_scale = IMAGE_HEIGHT / frame.shape[0]
        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        # better to pad, but cut out for demo
        scaled_img = scaled_img[:, 0:scaled_img.shape[1] -
                                (scaled_img.shape[1] % STRIDE)]

        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

        normalized_img = (scaled_img.astype(np.float32) - img_mean) / 255.0
        normalized_img = np.expand_dims(normalized_img.transpose(2, 0, 1),
                                        axis=0)

        # exectution
        if is_video:
            input_blobs = net.get_input_blob_list()
            net.set_input_blob_data(normalized_img, input_blobs[0])
            net.update()
            features, heatmaps, pafs = net.get_results()

        else:
            print('Start inference...')
            if args.benchmark:
                print('BENCHMARK mode')
                for i in range(5):
                    start = int(round(time.time() * 1000))
                    features, heatmaps, pafs = net.predict([normalized_img])
                    end = int(round(time.time() * 1000))
                    print(f'\tailia processing time {end - start} ms')
            else:
                features, heatmaps, pafs = net.predict([normalized_img])

        inference_result = (features[-1].squeeze(), heatmaps[-1].squeeze(),
                            pafs[-1].squeeze())

        poses_3d, poses_2d = parse_poses(inference_result, input_scale, STRIDE,
                                         fx, is_video)
        edges = []
        if len(poses_3d):
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES +
                     19 * np.arange(poses_3d.shape[0]).reshape(
                         (-1, 1, 1))).reshape((-1, 2))
        plotter.plot(canvas_3d, poses_3d, edges)

        if is_video:
            cv2.imshow(canvas_3d_window_name, canvas_3d)
        else:
            cv2.imwrite(f'Canvas3D_{frame_id}.png', canvas_3d)

        draw_poses(frame, poses_2d)
        current_time = (cv2.getTickCount() -
                        current_time) / cv2.getTickFrequency()
        if mean_time == 0:
            mean_time = current_time
        else:
            mean_time = mean_time * 0.95 + current_time * 0.05
        cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10),
                    (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255))

        if is_video:
            cv2.imshow('ICV 3D Human Pose Estimation', frame)
        else:
            cv2.imwrite(args.savepath, frame)

        key = cv2.waitKey(delay)
        if key == esc_code:
            break
        if key == p_code:
            if delay == 1:
                delay = 0
            else:
                delay = 1

        if delay == 0 and args.rotate3d:
            key = 0
            while (key != p_code and key != esc_code and key != space_code):
                plotter.plot(canvas_3d, poses_3d, edges)
                cv2.imshow(canvas_3d_window_name, canvas_3d)
                key = cv2.waitKey(33)
            if key == esc_code:
                break
            else:
                delay = 1

    print('Script finished successfully.')
Пример #8
0
        def updatefig(i, noPlot=False):
            print("updatefig", i, label)
            # ax0.cla()
            # ax1.cla()
            # ax2.cla()

            imageIdx = i + startFrom
            print("imageIdx", imageIdx)

            if (noPlot == False and imageIdx == vidLength):
                print(
                    f'imageIdx {imageIdx} == vidLength {vidLength}; closing!')
                plt.close(fig)
            try:
                frame = vid.get_data(imageIdx)
                input_scale = base_height / frame.shape[0]
                fx = np.float32(0.8 * frame.shape[1])
                scaled_img = cv2.resize(frame,
                                        dsize=None,
                                        fx=input_scale,
                                        fy=input_scale)
                scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (
                    scaled_img.shape[1] %
                    stride)]  # better to pad, but cut out for demo

                inference_result = net.infer(scaled_img)
                poses_3dFromImage, poses_2d = parse_poses(
                    inference_result, input_scale, stride, fx, is_video)
            except:
                poses_3dFromImage = []
                poses_2d = []

            if len(poses_3dFromImage) == 0 or len(
                    poses_2d) == 0 or poses_3dFromImage.all(
                    ) == nullPose3D.all():
                print("No pose  detected ")
                # return False if noPlot==True else ax0,ax1,ax2
                # return False
                poses_3dFromImage = np.array([np.zeros((19, 3))])
            else:
                if True:
                    poses_3dFromImage = rotate_poses(poses_3dFromImage, R, t)
                    poses_3dFromImage = reshape_poses(poses_3dFromImage)
                else:
                    poses_3dFromImage = stand3dmatrix

            if (noPlot == False):
                edgesFromImage = (
                    Plotter3d.SKELETON_EDGES +
                    19 * np.arange(poses_3dFromImage.shape[0]).reshape(
                        (-1, 1, 1))).reshape((-1, 2))
                canvas_3d = np.zeros((450, 450, 3), dtype=np.uint8)
                plotter = Plotter3d(canvas_3d.shape[:2])
                plotter.plot(canvas_3d, poses_3dFromImage, edgesFromImage)
                ax0.imshow(canvas_3d)
                draw_poses(frame, poses_2d)
                ax1.imshow(frame)

            # Setting the values for all axes.
            csiIndices, parsedTimeInVid = imageIdx2csiIndicesPrecise(
                duration_in_sec, imageIdx, tsList, vidLength, lastsec)

            if (noPlot == True):
                print("parsedTimeInVid", parsedTimeInVid)
                parsedPoses_3dFromImage = np.array(
                    poses_3dFromImage[0]).reshape(3 * 19)
                parsedTimeInVid_array = np.array([parsedTimeInVid])
                pose3D_value.append(
                    np.concatenate(
                        (parsedTimeInVid_array, parsedPoses_3dFromImage)))
                if (len(csiIndices) > 0):
                    startCSIIdx = csiIndices[0]
                    endCSIIdx = csiIndices[len(csiIndices) - 1]
                    print(startCSIIdx, '-', endCSIIdx)
                    print(endCSIIdx - startCSIIdx + 1)
                    for k in csiIndices:
                        curParseCSI = parseCSI(csiList[k])
                        print("adding ", curParseCSI)
                        if (curParseCSI != False):
                            print("len check")
                            print(k, len(curParseCSI), tsList[k])
                            if (len(curParseCSI) != 384):
                                print("len not 384")
                                continue
                            print("isFloat check")
                            isInt = True
                            for l in range(384):
                                if (isinstance(curParseCSI[l], int) == False):
                                    print(curParseCSI[l], " is not int")
                                    isInt = False
                                    break
                            if isInt == False:
                                continue
                            csi_value.append([tsList[k]] +
                                             parseCSI(csiList[k]))
                            print("added ", k)
                        else:
                            csi_value.append([tsList[k]] +
                                             [0 for l in range(384)])
                            print("added ", k, 'as 0s')
            else:
                for j in range(0, 64):
                    if (6 <= j < 32 or 33 <= j < 59):
                        textX = []
                        textY = []
                        for k in csiIndices:
                            textX.append(tsList[k] / (10**6))
                            textY.append(rawCSItoAmp(parseCSI(x), 128)[k][j])
                        ax2.plot(textX,
                                 gaussian_filter(textY, sigma=1),
                                 label='CSI subcarrier')
                print("added")

            # print(tsList[csiIdx])
            return False  #if noPlot==True else ax0,ax1,ax2
Пример #9
0
def pose3d():
    stride = 8
    #f.write("check8");
    net = InferenceEnginePyTorch(
        PyPATH + '\model\human-pose-estimation-3d.pth', 'GPU')

    with open(PyPATH + '\parameters\extrinsics.json', 'r') as f:
        extrinsics = json.load(f)
    R = np.array(extrinsics['R'], dtype=np.float32)
    t = np.array(extrinsics['t'], dtype=np.float32)

    cap = cv2.VideoCapture(1)

    if not (cap.isOpened):
        print("Webcam not recognized")

    base_height = 256

    while True:
        ret, frame = cap.read()
        if (ret == False):
            continue
        #time.sleep(1)
        #print(ret)
        input_scale = base_height / frame.shape[0]
        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        #scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)]  # better to pad, but cut out for demo
        fx = np.float32(0.8 * frame.shape[1])

        inference_result = net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride,
                                         fx)
        if len(poses_2d):
            poses_3d_copy = poses_2d.copy()
            x = poses_3d_copy[:, 0::3]
            y = poses_3d_copy[:, 1::3]
            #0 - GRUD
            #1 - nose
            #2 - nothing
            #3 - levoe plecho
            #4 - levi lokot
            #5 - levi zapastie
            #6 - levoe taz
            #7 - levi koleno
            #8 - levi stopa
            #9 - pravoe plecho
            #10 - pravoe lokot
            #11 - pravoe zapastie
            #12 - pravoe taz
            #13 - pravoe koleno
            #14 - pravoe stopa
            frame = cv2.circle(frame, (x[0][7], y[0][7]), 10, (255, 0, 0))
            print("x")
            print(x[0][7])
            print("y")
            print(y[0][7])
            #frame
            #size = len(x[0])
            #i = 0
            #for a in x[0]:
            #frame = cv2.circle(frame, (x[0][i], y[0][i]), 10,(255, 0, 0))
            #i=i+1
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            #print(poses_3d_copy)
            #print("x")
            #print(x)
            #print("y")
            #print(y)
        if len(poses_3d):
            poses_3d_copy = poses_3d.copy()
            #poses_3d_1 = poses_3d
            #poses_3d_2 = poses_3d
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
Пример #10
0
    async def estimate(websocket, path):
        name = await websocket.recv()
        print(f"< {name}")
        global fx
        delay = 1
        esc_code = 27
        p_code = 112
        space_code = 32
        mean_time = 0

        for frame in frame_provider:
            current_time = cv2.getTickCount()
            if frame is None:
                break
            input_scale = base_height / frame.shape[0]
            scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale)
            scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)]  # better to pad, but cut out for demo
            if fx < 0:  # Focal length is unknown
                fx = np.float32(0.8 * frame.shape[1])

            inference_result = net.infer(scaled_img)
            poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video)
            edges = []
            if len(poses_3d):
                poses_3d = rotate_poses(poses_3d, R, t)
                poses_3d_copy = poses_3d.copy()
                x = poses_3d_copy[:, 0::4]
                y = poses_3d_copy[:, 1::4]
                z = poses_3d_copy[:, 2::4]
                poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

                poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
                edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))
            plotter.plot(canvas_3d, poses_3d, edges)
            cv2.imshow(canvas_3d_window_name, canvas_3d)

            draw_poses(frame, poses_2d)

            current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency()
            if mean_time == 0:
                mean_time = current_time
            else:
                mean_time = mean_time * 0.95 + current_time * 0.05
            cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10),
                        (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255))
            cv2.imshow('ICV 3D Human Pose Estimation', frame)

            greeting = f"{json.dumps(poses_3d, cls=NumpyEncoder)}"
            await websocket.send(greeting)
            print(f"> {greeting}")

            key = cv2.waitKey(delay)
            if key == esc_code:
                break
            if key == p_code:
                if delay == 1:
                    delay = 0
                else:
                    delay = 1
            if delay == 0 or not is_video:  # allow to rotate 3D canvas while on pause
                key = 0
                while (key != p_code
                    and key != esc_code
                    and key != space_code):
                    plotter.plot(canvas_3d, poses_3d, edges)
                    cv2.imshow(canvas_3d_window_name, canvas_3d)
                    key = cv2.waitKey(33)
                if key == esc_code:
                    break
                else:
                    delay = 1
Пример #11
0
def run_inference(args):
    from modules.inference_engine_pytorch import InferenceEnginePyTorch

    socket_server = SocketServer(args.port)
    joint_angle_calculator = JointAngleCalculator()

    stride = 8

    model_path = os.path.join('models', 'human-pose-estimation-3d.pth')
    net = InferenceEnginePyTorch(model_path, "GPU")

    canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8)
    plotter = Plotter3d(canvas_3d.shape[:2])
    canvas_3d_window_name = 'Canvas 3D'
    cv2.namedWindow(canvas_3d_window_name)
    cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback)

    file_path = None
    if file_path is None:
        file_path = os.path.join('data', 'extrinsics.json')
    with open(file_path, 'r') as f:
        extrinsics = json.load(f)
    R = np.array(extrinsics['R'], dtype=np.float32)
    t = np.array(extrinsics['t'], dtype=np.float32)

    frame_provider = ImageReader(args.images)
    is_video = False
    if args.video != '':
        frame_provider = VideoReader(args.video)
        is_video = True
    base_height = args.height_size
    fx = 1 # focal length

    delay = 1
    esc_code = 27
    p_code = 112
    space_code = 32
    mean_time = 0

    for frame in frame_provider:
        current_time = cv2.getTickCount()
        if frame is None:
            break
        input_scale = base_height / frame.shape[0]
        scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale)
        scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)]  # better to pad, but cut out for demo
        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

        inference_result = net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video)
        edges = []

        if len(poses_3d):
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))

        plotter.plot(canvas_3d, poses_3d, edges)
        cv2.imshow(canvas_3d_window_name, canvas_3d)

        draw_poses(frame, poses_2d)
        current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency()
        if mean_time == 0:
            mean_time = current_time
        else:
            mean_time = mean_time * 0.95 + current_time * 0.05
        cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10),
                    (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255))
        cv2.imshow('ICV 3D Human Pose Estimation', frame)

        key = cv2.waitKey(delay)
        if key == esc_code:
            break
        if key == p_code:
            if delay == 1:
                delay = 0
            else:
                delay = 1
        if delay == 0 or not is_video:  # allow to rotate 3D canvas while on pause
            key = 0
            while (key != p_code
                   and key != esc_code
                   and key != space_code):
                plotter.plot(canvas_3d, poses_3d, edges)
                cv2.imshow(canvas_3d_window_name, canvas_3d)
                key = cv2.waitKey(33)
            if key == esc_code:
                break
            else:
                delay = 1
        
        joint_angles = joint_angle_calculator.calculate_angles(poses_3d)
        if joint_angles:
            socket_server.send_data(joint_angles)
    def infer(self, frame, is_video=True, fx=None):
        stride = 8
        output = {}
        if fx is None:
            fx = self.fx
            output["focal_length"] = {
                "value": fx,
                "comment": "default value used because none was supplied"
            }

        R = np.array(self.extrinsics['R'], dtype=np.float32)
        t = np.array(self.extrinsics['t'], dtype=np.float32)

        input_scale = self.base_height / frame.shape[0]

        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)

        scaled_img = scaled_img[:, 0:scaled_img.shape[1] -
                                (scaled_img.shape[1] % stride)]

        # scaled_img = pad_resize_image(scaled_img, (scaled_img.shape[0], (scaled_img.shape[1] + stride)//stride, scaled_img.shape[2]))

        output["input_size"] = {
            "value": scaled_img.shape,
            "comment": "network inpute size"
        }

        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

            output["focal_length"] = {
                "value": fx,
                "comment": "Focal length is unknown, 0.8 * frame width used"
            }

        # the inference
        inference_result = self.net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride,
                                         fx, is_video)
        edges = []

        if len(poses_3d):
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES +
                     19 * np.arange(poses_3d.shape[0]).reshape(
                         (-1, 1, 1))).reshape((-1, 2))

        output["pose_3d"] = {
            "value": poses_3d,
            "comment": "re-oriented 3D poses"
        }

        output["pose_2d"] = {"value": poses_2d, "comment": "2D poses"}

        output["edges"] = {"value": edges, "comment": "2D poses"}

        return output