def test_model_accuracy(self):
     image = Image.open(test_image).convert('RGB')
     for model_path, model_name in test_utils.generate_models():
         print('Testing Accuracy for: ', model_path)
         engine = PoseEngine(model_path)
         model_pose_result, _ = engine.DetectPosesInImage(image)
         reference_pose_scores, reference_keypoints = test_utils.parse_reference_results(
             model_name)
         score_delta = 0.1  # Allows score to change within 1 decimal place.
         pixel_delta = 4.0  # Allows placement changes of 4 pixels.
         pose_idx = 0
         keypoint_idx = 0
         for model_pose in model_pose_result:
             self.assertAlmostEqual(model_pose.score,
                                    reference_pose_scores[pose_idx],
                                    delta=score_delta)
             for _, model_keypoint in model_pose.keypoints.items():
                 reference_keypoint = reference_keypoints[keypoint_idx]
                 self.assertAlmostEqual(model_keypoint.score,
                                        reference_keypoint.score,
                                        delta=score_delta)
                 self.assertAlmostEqual(model_keypoint.point[0],
                                        reference_keypoint.point[0],
                                        delta=pixel_delta)
                 self.assertAlmostEqual(model_keypoint.point[1],
                                        reference_keypoint.point[1],
                                        delta=pixel_delta)
                 keypoint_idx += 1
             pose_idx += 1
Пример #2
0
 def __init__(self):
     self.engine = PoseEngine(
         'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
     self.confidence = 3
     self.marin = 50
     self.image_shape = (640, 480)
     self.input_shape = (641, 481)
Пример #3
0
    def __init__(self, config):
        self.config = config
        # Get the model name from the config
        self.model_name = self.config.DETECTOR_NAME
        # Frames Per Second
        self.fps = None 
        self.engine = PoseEngine(
                "/project-posenet/models/mobilenet/posenet_mobilenet_v1_075_721_1281_quant_decoder_edgetpu.tflite"
                )

        self.w, self.h, = self.config.DETECTOR_INPUT_SIZE[0], self.config.DETECTOR_INPUT_SIZE[1]
        self.keypoints = (
	      'nose',
	      'left eye',
	      'right eye',
	      'left ear',
	      'right ear',
	      'left shoulder',
	      'right shoulder',
	      'left elbow',
	      'right elbow',
	      'left wrist',
	      'right wrist',
	      'left hip',
	      'right hip',
	      'left knee',
	      'right knee',
	      'left ankle',
	      'right ankle'
	    )
Пример #4
0
def main():
    parser = argparse.ArgumentParser(description='PoseNet')
    parser.add_argument('--model', type=str, default='mobilenet')
    args = parser.parse_args()

    if args.model == 'mobilenet':
        model = 'models/mobilenet/posenet_mobilenet_v1_075_353_481_quant_decoder_edgetpu.tflite'
    else:
        model = 'models/resnet/posenet_resnet_50_416_288_16_quant_edgetpu_decoder.tflite'

    engine = PoseEngine(model)
    input_shape = engine.get_input_tensor_shape()
    inference_size = (input_shape[2], input_shape[1])
    print(inference_size)
    cap = cv2.VideoCapture(0)
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out_video = cv2.VideoWriter('./output.mp4', fourcc,
                                cap.get(cv2.CAP_PROP_FPS), inference_size)

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    if cap is None:
        print("Camera Open Error")
        sys.exit(0)

    count = 0
    total_ftp = 0.0
    fps_cnt = 0
    while cap.isOpened() and count < 60:
        ret_val, img = cap.read()
        if ret_val == False:
            print("Camera read Error")
            break
        print('frame read')
        img = cv2.resize(img, inference_size)
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        s_time = time.time()
        poses, inference_time = engine.DetectPosesInImage(rgb)
        fps = 1.0 / (time.time() - s_time)
        total_ftp += fps
        fps_cnt += 1
        for pose in poses:
            print('\nPose Score: %f  FPS:%f' % (pose.score, fps))
            if pose.score < 0.3: continue
            for label, keypoint in pose.keypoints.items():
                print(' %-20s x=%-4d y=%-4d score=%.1f' %
                      (label, keypoint.yx[1], keypoint.yx[0], keypoint.score))
                p1 = (keypoint.yx[1], keypoint.yx[0])
                p2 = (keypoint.yx[1] + 5, keypoint.yx[0] + 5)
                cv2.circle(img, (keypoint.yx[1], keypoint.yx[0]), 2,
                           (0, 255, 0), -1)

        out_video.write(img)
        count += 1
    if fps_cnt > 0:
        print('Model[%s] Avg FPS: %f' % (args.model, total_ftp / fps_cnt))
    cv2.destroyAllWindows()
    cap.release()
    out_video.release()
Пример #5
0
def posecamera(cap):
    
    model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite'

    engine = PoseEngine(model)
    # engine = PoseEngine(model)

    width, height = src_size

    isVideoFile = False
    frameCount = 0
    maxFrames = 0

    # VideoCapture init

    print("Open Video Source")
    cap.set(cv2.CAP_PROP_FPS, 60)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

    try:
        pose_ary = {}
        flag = False
        while (True
            ):
            ret, frame = cap.read()
            if ret == False:
                print('can\'t read video source')
                break;

           # frame = cv2.imread('./images/output'+str(i)+'.jpg')
            rgb = frame[:,:,::-1]

#             nonlocal n, sum_fps, sum_process_time, sum_inference_time, last_time
            # image = Image.fromarray(rgb)
            outputs, inference_time = engine.DetectPosesInImage(rgb)
     #      cv2.imshow('TEST PoseNet - OpenCV', frame)

#            print(text_line)

            # crop image
            imgDisp = frame[0:appsink_size[1], 0:appsink_size[0]].copy()
##            cv2.imshow('TEST2 PoseNet - OpenCV', frame)

            if args.mirror == True:
                imgDisp = cv2.flip(imgDisp, 1)

            for pose in outputs:
                draw_pose(imgDisp, pose)

            cv2.imshow('PoseNet - OpenCV', imgDisp)

    except Exception as ex:
        raise ex
    finally:
        cv2.destroyAllWindows()
        cap.release()
Пример #6
0
def run(inf_callback, render_callback):
    global t
    if t >= 3:
        return
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror',
                        help='flip video horizontally',
                        action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--res',
                        help='Resolution',
                        default='640x480',
                        choices=['480x360', '640x480', '1280x720'])
    parser.add_argument('--videosrc',
                        help='Which video source to use',
                        default='/dev/video0')
    parser.add_argument('--h264',
                        help='Use video/x-h264 input',
                        action='store_true')
    parser.add_argument('--jpeg',
                        help='Use image/jpeg input',
                        action='store_true')
    args = parser.parse_args()

    default_model = 'models/mobilenet/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite'
    if args.res == '480x360':
        src_size = (480, 360)
        appsink_size = (480, 360)
        model = args.model or default_model % (353, 481)
    elif args.res == '640x480':
        src_size = (640, 480)
        appsink_size = (640, 480)
        model = args.model or default_model % (481, 641)
    elif args.res == '1280x720':
        src_size = (1280, 720)
        appsink_size = (1280, 720)
        model = args.model or default_model % (721, 1281)

    print('Loading model: ', model)
    engine = PoseEngine(model)
    input_shape = engine.get_input_tensor_shape()
    inference_size = (input_shape[2], input_shape[1])

    gstreamer.run_pipeline(
        partial(inf_callback, engine),
        partial(render_callback, engine),
        src_size,
        inference_size,
        #    mirror=args.mirror,
        mirror=True,
        videosrc=args.videosrc,
        h264=args.h264,
        jpeg=args.jpeg)
    def __init__(self, model, anonymize=True, bodyparts=True, drawposes=True):
        self.anonymize = anonymize
        self.bodyparts = bodyparts
        self.drawposes = drawposes
        self.background_image = None
        self.last_time = time.monotonic()
        self.frames = 0
        self.sum_fps = 0
        self.sum_process_time = 0
        self.sum_inference_time = 0

        self.engine = PoseEngine(model)
        inference_size = (self.engine.image_width, self.engine.image_height)
        print('Inference size: {}'.format(inference_size))
Пример #8
0
def main():

    synth = fluidsynth.Synth()

    synth.start('alsa')
    soundfont_id = synth.sfload('/usr/share/sounds/sf2/FluidR3_GM.sf2')
    for channel, instrument in enumerate(CHANNELS):
        synth.program_select(channel, soundfont_id, 0, instrument)

    pil_image = Image.open(
        'PATH/TO/TEST/IMAGE/GOES/HERE.jpg'
    )  #this will open an image to run through PoseNet and return a Pose object

    pil_image.resize((641, 481), Image.NEAREST)
    engine = PoseEngine(
        'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
    poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image))
    print('Inference time: %.fms' % inference_time)

    # for pose in poses:

    velocities = {}
    for pose in poses:
        left = pose.keypoints.get('left wrist')
        right = pose.keypoints.get('right wrist')
        if not (left and right): continue

        if pose.score < 0.4: continue
        print('\nPose Score: ', pose.score)
        for label, keypoint in pose.keypoints.items():
            print(' %-20s x=%-4d y=%-4d score=%.1f' %
                  (label, keypoint.yx[1], keypoint.yx[0], keypoint.score))

        identity = IDENTITIES[2 % len(
            IDENTITIES)]  #set identity to VOICE_OOHS #Those Voice OOHs tho!
        left = 1 - left.yx[0] / engine.image_height
        right = 1 - right.yx[0] / engine.image_height
        velocity = int(left * 165)
        i = int(right * identity.extent)
        note = (identity.base_note + OCTAVE * (i // len(SCALE)) +
                SCALE[i % len(SCALE)])
        velocities[(identity.channel, note)] = velocity
        # prev_notes is assumed to play notes based on the previous frame (when using video or a live feed
        #for note in prev_notes:
        #    if note not in velocities: synth.noteoff(*note)
        # Our way of making the note last longer since we are using just one frame
        for note, velocity in velocities.items():
            synth.noteon(*note, velocity)
            time.sleep(1)
Пример #9
0
def run(callback, use_appsrc=False):
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror', help='flip video horizontally', action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--res', help='Resolution', default='640x480',
                        choices=['480x360', '640x480', '1280x720'])
    parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video1')
    parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true')
    args = parser.parse_args()

    default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite'
    if args.res == '480x360':
        src_size = (640, 480)
        appsink_size = (480, 360)
        model = args.model or default_model % (353, 481)
    elif args.res == '640x480':
        src_size = (640, 480)
        appsink_size = (640, 480)
        model = args.model or default_model % (481, 641)
    elif args.res == '1280x720':
        src_size = (1280, 720)
        appsink_size = (1280, 720)
        model = args.model or default_model % (721, 1281)

    print('Loading model: ', model)
    engine = PoseEngine(model, mirror=args.mirror)
    gstreamer.run_pipeline(partial(callback, engine),
                           src_size, appsink_size,
                           use_appsrc=use_appsrc, mirror=args.mirror,
                           videosrc=args.videosrc, h264input=args.h264)
Пример #10
0
Файл: PN.py Проект: zuypt/thesis
class PoseNet:
    def __init__(
        self,
        model='../models/posenet_mobilenet_v1_075_353_481_quant_decoder_edgetpu.tflite'
    ):
        self.engine = PoseEngine(model, mirror=False)

    def eval(self, frame):
        nposes, pose_scores, kps_coords, kps_scores = self.engine.DetectPosesInImage(
            frame)
        kps_coords = kps_coords.astype(np.int32)

        pose_list = []
        if nposes:
            for i in range(nposes):
                if pose_scores[i] >= C_PSCORE_THRESHOLD:
                    kps_coord = kps_coords[i]
                    kps_score = kps_scores[i]

                    keypoints = {}
                    for j in range(C_NKP):
                        if kps_score[j] >= C_KP_THRESHOLD:
                            keypoints[C_KP_NAMES[j]] = Keypoint(
                                C_KP_NAMES[j], kps_coord[j], kps_score[j])
                        pose_list.append(Pose(keypoints, pose_scores[i]))
        return pose_list
def forehead_coords(engine, img, thermal_shape, h, v, im_x=641, im_y=481):
    
    # Posenet getting keypoints
    pil_image = img
    im_x, im_y = img.size
    engine = PoseEngine('./forehead_detection/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
    poses, keypoints, img, inference_time = engine.DetectPosesInImage(np.uint8(pil_image),0)
    # Getting forehead coordinates for 10 people in frame (NOTE: posenet currently processes a maximum of 10 people in the frame)
    keypoints_eyes = (keypoints[:,1,:], keypoints[:,2,:])
    lips_keypoints = []
    forehead_keypoints = []
    transformed_keypoints = []
    x_ratio = (im_x*h)/thermal_shape[0] 
    y_ratio = (im_y*v)/thermal_shape[1]
    x_bias = (im_x*h-im_x)/2
    y_bias = (im_y*v-im_y)/2
    for i in range(10):
        dist = abs(keypoints_eyes[0][i][1]-keypoints_eyes[1][i][1])
        # Format: ((left_eye_x, left_eye_y), (right_eye_x, right_eye_y+offset)) offset is the distance wanted above the eyes.
        forehead_keypoints.append([(int(keypoints_eyes[0][i][1]+dist*0.5),int(keypoints_eyes[0][i][0]+dist*0.2)), (int(keypoints_eyes[1][i][1]-dist*0.5), int(keypoints_eyes[1][i][0]-dist*1.5))])     
        # Transform coordinates
        if keypoints_eyes[0][i][1]==0:
            xL, yL, xR, yR = 0,0,0,0
        else:
            # include some hard-coded emperical fix
            e = np.e
            xL = (forehead_keypoints[i][0][0]+ x_bias) /x_ratio 
            yL = (forehead_keypoints[i][0][1]+ y_bias) /y_ratio 
            xR = (forehead_keypoints[i][1][0] + x_bias) /x_ratio
            yR = (forehead_keypoints[i][1][1] + y_bias) /y_ratio
            size_factor = abs(xL-xR)/thermal_shape[0]
            size_factor=0.08*e**size_factor
            v_dis = abs((xR+xL)/2-thermal_shape[0])
            ori_factor = abs(xR+xL+20)/thermal_shape[0]*1.5
            xL, xR = xL-v_dis*0.7/e**ori_factor, xR-v_dis*0.7/e**ori_factor
            yL, yR = yL+size_factor*100, yR+size_factor*100
            
        transformed_keypoints.append(((int(xL),int(yL)), (int(xR),int(yR))))
        
        # Find lip keypoints
        lip_dist = abs(keypoints_eyes[0][i][1]-keypoints_eyes[1][i][1])*1.5
        lips_keypoints.append([(int(keypoints_eyes[0][i][1]),int(keypoints_eyes[0][i][0]+0.5*lip_dist)), (int(keypoints_eyes[1][i][1]), int(keypoints_eyes[1][i][0]+lip_dist))])     
        
    return forehead_keypoints, transformed_keypoints, lips_keypoints
    def run(self):
        print('Loading model: ', self.model)
        engine = PoseEngine(self.model, mirror=self.args.mirror)

        cap = cv2.VideoCapture(0)

        last_time = time.monotonic()
        n = 0
        sum_fps = 0
        sum_process_time = 0
        sum_inference_time = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            cv2_im = frame

            # pil_im = Image.fromarray(cv2_im)

            start_time = time.monotonic()
            outputs, inference_time = engine.DetectPosesInImage(cv2_im)
            end_time = time.monotonic()
            n += 1
            sum_fps += 1.0 / (end_time - last_time)
            sum_process_time += 1000 * (end_time - start_time) - inference_time
            sum_inference_time += inference_time
            last_time = end_time

            text_line = 'PoseNet: %.1fms Frame IO: %.2fms TrueFPS: %.2f Nposes %d' % (
                sum_inference_time / n, sum_process_time / n, sum_fps / n,
                len(outputs))
            print(text_line)

            for pose in outputs:
                self.draw_pose(cv2_im, pose)

            cv2.imshow('frame', cv2_im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()
Пример #13
0
def pose_inferencer(results, frameBuffer, model, device):

    pose_engine = None
    pose_engine = PoseEngine(model, device)
    print("Loaded Graphs!!! (Posenet)")

    while True:

        if frameBuffer.empty():
            continue

        # Run inference.
        color_image = frameBuffer.get()
        prepimg_pose = color_image[:, :, ::-1].copy()
        tinf = time.perf_counter()
        result_pose, inference_time = pose_engine.DetectPosesInImage(
            prepimg_pose)
        print(time.perf_counter() - tinf, "sec (Posenet)")
        results.put(result_pose)
Пример #14
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror',
                        help='flip video horizontally',
                        action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--width', help='Source width', default='1920')
    parser.add_argument('--height', help='Source height', default='1080')
    parser.add_argument('--videosrc',
                        help='Which video source to use',
                        default='/dev/video0')
    parser.add_argument('--h264',
                        help='Use video/x-h264 input',
                        action='store_true')
    parser.add_argument('--jpg',
                        help='Use video/x-h264 input',
                        action='store_true')
    parser.add_argument('--print-stats',
                        help='print timing stats',
                        action='store_true')
    args = parser.parse_args()

    if args.h264 and args.jpg:
        print('Error: both mutually exclusive options h264 and jpg set')
        sys.exit(1)

    default_model = 'models/bodypix_mobilenet_v1_075_1280_720_16_edgetpu.tflite'
    model = args.model if args.model else default_model
    print('Model: {}'.format(model))

    engine = PoseEngine(model)
    inference_size = (engine.image_width, engine.image_height)
    print('Inference size: {}'.format(inference_size))

    src_size = (int(args.width), int(args.height))
    if args.videosrc.startswith('/dev/video'):
        print('Source size: {}'.format(src_size))

    print('Toggle mode keys:')
    print(' TOGGLE_SKELETONS = s')
    print(' TOGGLE_BBOXES = b')
    print(' TOGGLE_ANON = a')
    print(' TOGGLE_HM = h')
    print(' TOGGLE_AHM = g')
    run_pipeline(Callback(engine,
                          save_frames=False,
                          print_stats=args.print_stats),
                 src_size,
                 inference_size,
                 video_src=args.videosrc,
                 h264=args.h264,
                 jpg=args.jpg,
                 mirror=args.mirror)
Пример #15
0
    def __init__(self, config, model_name, variables):
        self.config = config
        self.model_name = model_name
        self.model_variables = variables
        # Frames Per Second
        self.fps = None
        self.w, self.h, _ = [
            int(i) for i in self.model_variables['ImageSize'].split(',')
        ]
        self.engine = PoseEngine(
            f"/project-posenet/models/mobilenet/posenet_mobilenet_v1_075_{self.h}_{self.w}_quant_decoder_edgetpu.tflite"
        )

        # Get class id from config
        self.class_id = int(self.model_variables['ClassID'])
        self.score_threshold = float(self.model_variables['MinScore'])
        self.keypoints = ('nose', 'left eye', 'right eye', 'left ear',
                          'right ear', 'left shoulder', 'right shoulder',
                          'left elbow', 'right elbow', 'left wrist',
                          'right wrist', 'left hip', 'right hip', 'left knee',
                          'right knee', 'left ankle', 'right ankle')
Пример #16
0
def generate_results(write_csv=False,
                     visualize_model_results=False,
                     visualize_reference_results=False):
    """Generates results form a model (both from reference results or from new inference).
    Args:
      write_csv: Whether to write new inference results to a csv file or not.
      visualize_model_results: Whether to visualize the new results or not.
      visualize_reference_result: Whether to visualize the reference results or not.
    """
    image = Image.open(TEST_IMAGE).convert('RGB')
    for model_path, model_name in generate_models():
        engine = PoseEngine(model_path)
        poses, _ = engine.DetectPosesInImage(image)
        if write_csv:
            write_to_csv(model_name, poses)
        input_shape = engine.get_input_tensor_shape()[1:3]
        if visualize_model_results:
            visualize_results_from_model(model_name, image, input_shape, poses)
        if visualize_reference_results:
            visualize_results_from_reference_file(model_name, image,
                                                  input_shape)
Пример #17
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror',
                        help='flip video horizontally',
                        action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--width', help='Source width', default='1920')
    parser.add_argument('--height', help='Source height', default='1080')
    parser.add_argument('--videosrc',
                        help='Which video source to use',
                        default='/dev/video0')
    parser.add_argument('--h264',
                        help='Use video/x-h264 input',
                        action='store_true')
    parser.add_argument('--jpeg',
                        help='Use video/jpeg input',
                        action='store_true')
    args = parser.parse_args()

    if args.h264 and args.jpeg:
        print('Error: both mutually exclusive options h264 and jpeg set')
        sys.exit(1)

    default_model = 'models/bodypix_mobilenet_v1_075_1024_768_16_quant_edgetpu_decoder.tflite'
    model = args.model if args.model else default_model
    print('Model: {}'.format(model))

    engine = PoseEngine(model)
    inference_size = (engine.image_width, engine.image_height)
    print('Inference size: {}'.format(inference_size))

    src_size = (int(args.width), int(args.height))
    if args.videosrc.startswith('/dev/video'):
        print('Source size: {}'.format(src_size))

    print('Toggle mode keys:')
    print(' Toggle skeletons: ', TOGGLE_SKELETONS)
    print(' Toggle bounding boxes: ', TOGGLE_BBOXES)
    print(' Toggle anonymizer mode: ', TOGGLE_ANON)
    print(' Toggle heatmaps: ', TOGGLE_HEATMAP)
    print(' Toggle bodyparts: ', TOGGLE_BODYPARTS)
    run_pipeline(Callback(engine,
                          src_size,
                          save_every_n_frames=-1,
                          print_stats=True),
                 src_size,
                 inference_size,
                 video_src=args.videosrc,
                 h264=args.h264,
                 jpeg=args.jpeg,
                 mirror=args.mirror)
Пример #18
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror',
                        help='flip video horizontally',
                        action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--res',
                        help='Resolution',
                        default='640x480',
                        choices=['480x360', '640x480', '1280x720'])
    parser.add_argument('--videosrc',
                        help='Which video source to use',
                        default='/dev/video0')
    parser.add_argument('--videotgt',
                        help='Where to write the video to',
                        default='autovideosink')
    parser.add_argument('--anonymize',
                        help='Use anonymizer mode',
                        action='store_true')
    parser.add_argument('--jpg',
                        help='Use image/jpeg input',
                        action='store_true')
    args = parser.parse_args()

    default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite'
    if args.res == '480x360':
        src_size = (640, 480)
        appsink_size = (480, 360)
        model = args.model or default_model % (353, 481)
    elif args.res == '640x480':
        src_size = (640, 480)
        appsink_size = (640, 480)
        model = args.model or default_model % (481, 641)
    elif args.res == '1280x720':
        src_size = (1280, 720)
        appsink_size = (1280, 720)
        model = args.model or default_model % (721, 1281)

    print('Loading model: ', model)
    engine = PoseEngine(model, mirror=args.mirror)
    gstreamer.run_pipeline(Callback(engine, anonymize=args.anonymize),
                           src_size,
                           appsink_size,
                           use_appsrc=True,
                           mirror=args.mirror,
                           videosrc=args.videosrc,
                           jpginput=args.jpg,
                           videotgt=args.videotgt)
Пример #19
0
def inferencer(results, frameBuffer, model, camera_width, camera_height):

    engine = None

    # Acquisition of TPU list without model assignment
    devices = edgetpu_utils.ListEdgeTpuPaths(
        edgetpu_utils.EDGE_TPU_STATE_UNASSIGNED)

    devopen = False
    for device in devices:
        try:
            engine = PoseEngine(model, device)
            devopen = True
            break
        except:
            continue

    if devopen == False:
        print("TPU Devices open Error!!!")
        sys.exit(1)

    print("Loaded Graphs!!! ")

    while True:

        if frameBuffer.empty():
            continue

        # Run inference.
        color_image = frameBuffer.get()
        prepimg = color_image[:, :, ::-1].copy()

        tinf = time.perf_counter()
        result, inference_time = engine.DetectPosesInImage(prepimg)
        print(time.perf_counter() - tinf, "sec")
        results.put(result)
Пример #20
0
def run(callback, use_appsrc=False):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--mirror",
                        help="flip video horizontally",
                        action="store_true")
    parser.add_argument("--model", help=".tflite model path.", required=False)
    parser.add_argument(
        "--res",
        help="Resolution",
        default="1280x720",
        choices=["480x360", "640x480", "1280x720"],
    )
    parser.add_argument("--videosrc",
                        help="Which video source to use",
                        default="/dev/video0")
    parser.add_argument("--h264",
                        help="Use video/x-h264 input",
                        action="store_true")
    args = parser.parse_args()

    default_model = "models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite"
    if args.res == "480x360":
        src_size = (640, 480)
        appsink_size = (480, 360)
        model = args.model or default_model % (353, 481)
    elif args.res == "640x480":
        src_size = (640, 480)
        appsink_size = (640, 480)
        model = args.model or default_model % (481, 641)
    elif args.res == "1280x720":
        src_size = (1280, 720)
        appsink_size = (1280, 720)
        model = args.model or default_model % (721, 1281)

    print("Loading model: ", model)
    engine = PoseEngine(model, mirror=args.mirror)
    gstreamer.run_pipeline(
        partial(callback, engine),
        src_size,
        appsink_size,
        use_appsrc=use_appsrc,
        mirror=args.mirror,
        videosrc=args.videosrc,
        h264input=args.h264,
    )
Пример #21
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mirror', help='flip video horizontally', action='store_true')
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--width', help='Source width', default='640')
    parser.add_argument('--height', help='Source height', default='480')
    parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0')

    parser.add_argument('--anonymize', dest='anonymize', action='store_true', help='Use anonymizer mode [--noanonymize]')
    parser.add_argument('--noanonymize', dest='anonymize', action='store_false', help=argparse.SUPPRESS)
    parser.set_defaults(anonymize=False)

    parser.add_argument('--bodyparts', dest='bodyparts', action='store_true', help='Color by bodyparts [--nobodyparts]')
    parser.add_argument('--nobodyparts', dest='bodyparts', action='store_false', help=argparse.SUPPRESS)
    parser.set_defaults(bodyparts=True)

    parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true')
    parser.add_argument('--jpeg', help='Use video/jpeg input', action='store_true')
    args = parser.parse_args()

    if args.h264 and args.jpeg:
        print('Error: both mutually exclusive options h264 and jpeg set')
        sys.exit(1)

    default_model = 'models/bodypix_mobilenet_v1_075_640_480_16_quant_edgetpu_decoder.tflite'
    model = args.model if args.model else default_model
    print('Model: {}'.format(model))

    engine = PoseEngine(model)
    inference_size = (engine.image_width, engine.image_height)
    print('Inference size: {}'.format(inference_size))

    src_size = (int(args.width), int(args.height))
    if args.videosrc.startswith('/dev/video'):
        print('Source size: {}'.format(src_size))

    gstreamer.run_pipeline(Callback(engine,
                                    anonymize=args.anonymize,
                                    bodyparts=args.bodyparts),
                           src_size, inference_size,
                           mirror=args.mirror,
                           videosrc=args.videosrc,
                           h264=args.h264,
                           jpeg=args.jpeg)
Пример #22
0
def run(callback):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--model', help='.tflite model path.', required=False)
    parser.add_argument('--res',
                        help='Resolution',
                        default='640x480',
                        choices=['480x360', '640x480', '1280x720'])
    parser.add_argument('--camera',
                        help='Camera',
                        default='ip',
                        choices=['ip', 'built-in', 'realsense'])
    args = parser.parse_args()

    default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite'
    if args.res == '480x360':
        appsink_size = (480, 360)
        model = args.model or default_model % (353, 481)
    elif args.res == '640x480':
        appsink_size = (640, 480)
        model = args.model or default_model % (481, 641)
    elif args.res == '1280x720':
        appsink_size = (1280, 720)
        model = args.model or default_model % (721, 1281)
    camera = args.camera

    if camera == 'ip':
        src_size = (640, 480)
    elif camera == 'built-in':
        src_size = (1280, 720)
    elif camera == 'realsense':
        src_size = (1920, 1080)

    print('Loading model: ', model)
    engine = PoseEngine(model, mirror=False)

    x_scalar = src_size[0] / appsink_size[0]
    y_scalar = src_size[1] / appsink_size[1]
    point_scalar = src_size[0] / 480

    gstreamer2.run_pipeline(
        partial(callback, engine, x_scalar, y_scalar, point_scalar), src_size,
        appsink_size, camera)
Пример #23
0
    #('left elbow', 'left wrist', 0.26), # avg male 0.26; avg female 0,235 (estimate)
    #('right elbow', 'right wrist', 0.26), # avg male 0.26; avg female 0,235 (estimate)
    ('left ankle', 'left knee', 0.45
     ),  # avg male 0.45; avg female 0.415 (valid)
    ('right ankle', 'right knee',
     0.45),  # avg male 0.45; avg female 0.415 (valid)
    ('left shoulder', 'left elbow',
     0.35),  #avg male 0.35; avg female 0.325 (valid)
    ('right shoulder', 'right elbow',
     0.35),  #avg male 0.35; avg female 0.325 (valid)
    ('right shoulder', 'right hip',
     0.56),  #avg male 0.5; avg female 0.485 (estimate)
    ('right shoulder', 'right hip',
     0.56),  #avg male 0.5; avg female 0.485 (estimate)
]

MAX = 2

THRESHOLD = 0.5

# depends on the camera that is used
# FOCAL_LENGTH = distance_to_obj * pixle_size_of_obj / real_obj_size
#FOCAL_LENGTH = 6.6761817924503907e+02 #616.728321429
FOCAL_LENGTH = 7.395847924773853e+02 / 2.3  #4.533079645790183e+02

ENGINE = PoseEngine(
    os.path.join(
        path,
        'src/project-posenet/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
    ))
Пример #24
0
import os
import cv2
import time
import numpy as np
from picamera import PiCamera
import matplotlib.pyplot as plt
from picamera.array import PiRGBArray

from pose_engine import PoseEngine

IM_WIDTH = 640
IM_HEIGHT = 480

# load deeplab-v3-plus model
engine = PoseEngine(
    'models/mobilenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
)

# initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
font = cv2.FONT_HERSHEY_SIMPLEX

camera = PiCamera()
camera.resolution = (IM_WIDTH, IM_HEIGHT)
camera.framerate = 10
rawCapture = PiRGBArray(camera, size=(IM_WIDTH, IM_HEIGHT))
rawCapture.truncate(0)

# squat counter
counter = 0
Пример #25
0
    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_file(source_file_name)

    print("File uploaded to {}.".format(destination_blob_name))


model = 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
print('Loading model: ', model)
engine = PoseEngine(model)
input_shape = engine.get_input_tensor_shape()
inference_size = (input_shape[2], input_shape[1])
src_size = (512, 512)
bucket_name = 'YOUR-BUCKET-HERE'

cap = cv2.VideoCapture(0)


def main():
    while True:
        ret, frame = cap.read()
        pil_image = Image.fromarray(frame)
        pil_image.resize((641, 481), Image.NEAREST)
        poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image))
        print('Inference Time: ', inference_time)
# set measurement time in second
parser = argparse.ArgumentParser(description='Set measurement time')
parser.add_argument('--t',
                    type=int,
                    default=20,
                    help='set the length of time for one measurement')
parser.add_argument('--total',
                    type=int,
                    default=60,
                    help='set the total length of time for monitoring')
args = parser.parse_args()

measurement_time = args.t
measurement_time_agg = args.total
engine = PoseEngine(
    '../forehead_detection/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
)

# #################### Viola-Jones Face Detection  ##################
# face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# eye_cascade = cv2.CascadeClassifier('haarcascade_eye_tree_eyeglasses.xml')
# mouth_cascade = cv2.CascadeClassifier('haarcascade_smile.xml')


def viola_jones(img):
    img1 = deepcopy(img)
    gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    if len(faces) == 0:
        raise ValueError('No Face detected')
    for (x, y, w, h) in faces:
Пример #27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--camera_idx',
                        type=str,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument(
        '--model',
        type=str,
        help='Pose model to use. ',
        default=
        'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
    parser.add_argument('--pose3d',
                        type=str,
                        help='3D Pose model to use. ',
                        default='models/3dpose_gan_edgetpu.tflite')
    parser.add_argument('--dataset',
                        type=str,
                        help='Type of dataset. ',
                        default="CORAL")
    parser.add_argument('--rot',
                        type=int,
                        help='Number of degree to rotate in 3D pose. ',
                        default=90)
    args = parser.parse_args()

    engine = PoseEngine(args.model)
    _, image_height, image_width, _ = engine.get_input_tensor_shape()
    interpreter_3dpose = None
    if len(args.pose3d) > 0:
        interpreter_3dpose = common.make_interpreter(args.pose3d)
        interpreter_3dpose.allocate_tensors()
    print("Load all models done!")

    cap = cv2.VideoCapture(args.camera_idx)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(cv2_im_rgb)
        pil_image.resize((image_width, image_height), Image.NEAREST)

        start_time = time.monotonic()
        poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image))
        print('2D Pose Inference time: %.fms (%.2f fps)' %
              (inference_time, 1000 / inference_time))

        cv2_im = draw_skel_and_kp(cv2_im, poses, args.rot, interpreter_3dpose)

        end_time = time.monotonic()
        process_time = 1000 * (end_time - start_time)
        print('3D Pose End-to-End Inference time: %.fms (%.2f fps)' %
              (process_time, 1000 / process_time))

        cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
        cv2.setWindowProperty("frame", cv2.WND_PROP_FULLSCREEN,
                              cv2.WINDOW_FULLSCREEN)
        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #28
0
from io import BytesIO
import time 
import re
import picamera
import io

import numpy as np
from PIL import Image
from PIL import ImageDraw


from pose_engine import PoseEngine

GHOST=False
picam = picamera.PiCamera()
engine = PoseEngine('models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
#engine = PoseEngine('models/posenet_mobilenet_v1_075_721_1281_quant_decoder_edgetpu.tflite')
EDGES = (
    ('nose', 'left eye'),
    ('nose', 'right eye'),
    ('nose', 'left ear'),
    ('nose', 'right ear'),
    ('left ear', 'left eye'),
    ('right ear', 'right eye'),
    ('left eye', 'right eye'),
    ('left shoulder', 'right shoulder'),
    ('left shoulder', 'left elbow'),
    ('left shoulder', 'left hip'),
    ('right shoulder', 'right elbow'),
    ('right shoulder', 'right hip'),
    ('left elbow', 'left wrist'),
Пример #29
0
def main():
    default_model_dir = '../all_models'
    default_model = 'posenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
    default_labels = 'hand_label.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=1,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=str,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='classifier score threshold')

    args = parser.parse_args()
    W = 640
    H = 480
    src_size = (W, H)
    print('Loading Pose model {}'.format(args.model))
    engine = PoseEngine(args.model)
    cap = cv2.VideoCapture(args.camera_idx)
    #==============social distancing=========================
    dt_vector = {}
    first_frame = True
    overlap_precision = 2  #"lower better, from 1 to 16, create ellipses image sub-size mask"

    #=======================================================
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if first_frame:
            mask = np.zeros(
                (int(frame.shape[0] / overlap_precision),
                 int(frame.shape[1] / overlap_precision), frame.shape[2]),
                dtype=np.uint8)
            first_frame = False

        background = frame

        ellipse_boxes = []

        draw_ellipse_requirements = []

        ellipse_pool = []

        cv2_im = frame
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        im_size = (frame.shape[1], frame.shape[0])
        horizontal_ratio = 0.7
        vertical_ratio = 0.7
        homography_matrix = compute_homography(horizontal_ratio,
                                               vertical_ratio, im_size)
        poses, inference_time = engine.DetectPosesInImage(
            np.uint8(pil_im.resize((641, 481), Image.NEAREST)))

        skeletal_coordinates = poses

        dt_vector['ts'] = int(round(time.time() * 1000))
        dt_vector['bodys'] = []

        if type(skeletal_coordinates) is list:
            # Remove probability from joints and get a joint position list
            skeletal_coordinates = create_joint_array(skeletal_coordinates)

            print(skeletal_coordinates[0])

            is_skeletal_overlapped = np.zeros(
                np.shape(skeletal_coordinates[0])[0])

            # Evaluate ellipses for each body detected by openpose
            draw_ellipse_requirements = evaluate_ellipses(
                skeletal_coordinates[0], draw_ellipse_requirements,
                ellipse_boxes, ellipse_pool, mask, homography_matrix)

            # Evaluate overlapping
            is_skeletal_overlapped = evaluate_overlapping(
                ellipse_boxes, is_skeletal_overlapped, ellipse_pool)
            # Trace results over output image
            bodys = trace(cv2_im, skeletal_coordinates,
                          draw_ellipse_requirements, is_skeletal_overlapped)
            dt_vector["bodys"] = bodys

        #===============================

        cv2_sodidi = cv2_im
        cv2.imshow('frame', cv2_im)
        cv2.imshow('1', cv2_sodidi)

        #===========print mouse pos=====================
        cv2.setMouseCallback('frame', onMouse)
        posNp = np.array(posList)
        print(posNp)
        #============streamming to server==============
        img = np.hstack((cv2_im, cv2_sodidi))
        #thay frame = img
        encoded, buffer = cv2.imencode('.jpg', img)
        jpg_as_text = base64.b64encode(buffer)
        footage_socket.send(jpg_as_text)
        #=============================================
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
Пример #30
0
from pose_engine import PoseEngine
import argparse

parser = argparse.ArgumentParser(description='PoseNet')
parser.add_argument('--model', type=str, default='mobilenet')
args = parser.parse_args()

os.system('wget https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/'
          'Hindu_marriage_ceremony_offering.jpg/'
          '640px-Hindu_marriage_ceremony_offering.jpg -O couple.jpg')
pil_image = Image.open('couple.jpg')

if (args.model == 'mobilenet'):
    pil_image.resize((641, 481), Image.NEAREST)
    engine = PoseEngine(
        'models/mobilenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite'
    )
else:
    pil_image.resize((640, 480), Image.NEAREST)
    engine = PoseEngine(
        'models/resnet/posenet_resnet_50_640_480_16_quant_edgetpu_decoder.tflite'
    )

poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image))
print('Inference time: %.fms' % inference_time)

output = pil_image.copy()
draw = ImageDraw.Draw(output)
for pose in poses:
    if pose.score < 0.4: continue
    print('\nPose Score: ', pose.score)