def detect(num_frames): """Face detection camera inference example""" # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performance. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 with CameraInference(face_detection.model()) as inference: for result in inference.run(num_frames): faces = face_detection.get_faces(result) annotator.clear() for face in faces: x, y, width, height = face.bounding_box annotator.bounding_box( (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)), fill=0) annotator.update() print('#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f' % (inference.count, inference.rate, len(faces), avg_joy_score(faces))) camera.stop_preview()
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=-1, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() with PiCamera() as camera: # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. camera.sensor_mode = 4 # Scaled and cropped resolution. If different from sensor mode implied # resolution, inference results must be adjusted accordingly. This is # true in particular when camera.start_recording is used to record an # encoded h264 video stream as the Pi encoder can't encode all native # sensor resolutions, or a standard one like 1080p may be desired. camera.resolution = (1640, 1232) # Start the camera stream. camera.framerate = 30 camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for i, result in enumerate(inference.run()): if i == args.num_frames: break faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() print('Iteration #%d: num_faces=%d' % (i, len(faces))) camera.stop_preview()
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for result in inference.run(args.num_frames): faces = face_detection.get_faces(result) print(faces) annotator.clear() for index, face in enumerate(faces): sio.emit('movement', { 'index': index, 'score': face.face_score }) annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() print('#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f' % (inference.count, inference.rate, len(faces), avg_joy_score(faces))) camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(object_detection.model()) as inference: for result in inference.run(args.num_frames): objects = object_detection.get_objects(result) annotator.clear() for obj in objects: rect = transform(obj.bounding_box) annotator.bounding_box(rect, fill=0) loc = (rect[0] + 4, rect[1]) annotator.text(loc, objectLabel(obj.kind)) annotator.update() #print('#%05d (%5.2f fps): num_objects=%d, objects=%s' % # (inference.count, inference.rate, len(objects), objects)) if len(objects) > 0: print( f"num_objects={len(objects)}, objects={[objectLabel(obj.kind) for obj in objects]}" ) camera.stop_preview()
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument('--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for result in inference.run(args.num_frames): faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() print('#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f' % (inference.count, inference.rate, len(faces), avg_joy_score(faces))) camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-f', type=int, dest='num_frames', default=-1, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument( '--num_pics', '-p', type=int, dest='num_pics', default=-1, help='Sets the max number of pictures to take, otherwise runs forever.' ) args = parser.parse_args() with PiCamera() as camera, PrivacyLed(Leds()): # See the Raspicam documentation for mode and framerate limits: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # Set to the highest resolution possible at 16:9 aspect ratio camera.sensor_mode = 4 camera.resolution = (1640, 1232) camera.start_preview(fullscreen=True) with CameraInference(pikachu_object_detection.model()) as inference: print("Camera inference started") player.play(*MODEL_LOAD_SOUND) last_time = time() pics = 0 save_pic = False enable_label = True # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) def leftCorner(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y) def truncateFloat(value): return '%.3f' % (value) for f, result in enumerate(inference.run()): print("sono dentro al ciclo..") print(os.getcwd() + '/pikachu_detector.binaryproto') annotator.clear() detections = enumerate( pikachu_object_detection.get_objects(result, 0.3)) for i, obj in detections: print("sono dentro al secondo ciclo..") print('%s', obj.label) annotator.bounding_box(transform(obj.bounding_box), fill=0) if enable_label: annotator.text( leftCorner(obj.bounding_box), obj.label + " - " + str(truncateFloat(obj.score))) print('%s Object #%d: %s' % (strftime("%Y-%m-%d-%H:%M:%S"), i, str(obj))) x, y, width, height = obj.bounding_box if obj.label == 'PIKACHU': save_pic = True #player.play(*BEEP_SOUND) # save the image if there was 1 or more cats detected if save_pic: # save the clean image #camera.capture("images/image_%s.jpg" % strftime("%Y%m%d-%H%M%S")) pics += 1 save_pic = False #if f == args.num_frames or pics == args.num_pics: # break now = time() duration = (now - last_time) annotator.update() # The Movidius chip runs at 35 ms per image. # Then there is some additional overhead for the object detector to # interpret the result and to save the image. If total process time is # running slower than 50 ms it could be a sign the CPU is geting overrun #if duration > 0.50: # print("Total process time: %s seconds. Bonnet inference time: %s ms " % # (duration, result.duration_ms)) last_time = now camera.stop_preview()
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--sparse', '-s', action='store_true', default=False, help='Use sparse tensors.') parser.add_argument('--threshold', '-t', type=float, default=0.3, help='Detection probability threshold.') parser.add_argument('--cam_width', type=int, default=1640, help='Camera Width') parser.add_argument('--cam_height', type=int, default=1232, help='Camera Height') parser.add_argument('--fps', type=int, default=30, help='Camera Frames Per Second') args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(args.cam_width, args.cam_height), framerate=args.fps) as camera: camera.start_preview() width = args.cam_width height = args.cam_height # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / width scale_y = 240 / height size = min(width, height) offset = (((width - size) / 2), ((height - size) / 2)) # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) while True: with CameraInference(face_detection.model()) as inference, \ CameraInference(object_detection.model()) as inference2: for result in inference.run(args.num_frames): faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) #annotator.update() print( '#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f' % (inference.count, inference.rate, len(faces), avg_joy_score(faces))) for result in inference2.run(args.num_frames): objects = object_detection.get_objects( result, args.threshold, offset) #annotator.clear() for i, obj in enumerate(objects): annotator.bounding_box(transform(obj.bounding_box), fill=0) print('Object #%d: %s' % (i, obj)) annotator.update() camera.stop_preview()
def main(): def face_data(face): x, y, width, height = face.bounding_box x_mean = int(x + width/2) angle = atan2(x_mean - x_center,focal_length) distance = 0 if width > 0: distance = focal_length * real_face_width_inch / width return angle, distance parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=-1, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() focal_length = 1320 # focal length in pixels for 1640 x 1232 resolution - found by calibration camera_resolution = (1640, 1232) x_center = int(camera_resolution[0] / 2) real_face_width_inch = 11 # width/height of bounding box of human face in inches min_angle = atan2(-x_center,focal_length) # min angle where face can be detected (leftmost area) in radians max_angle = atan2(x_center,focal_length) face_detected_on_prev_frame = False previous_angle = 0 LOAD_SOUND = ('G5e', 'f5e', 'd5e', 'A5e', 'g5e', 'E5e', 'g5e', 'C6e') BUZZER_GPIO = 22 with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera,\ Leds() as leds: leds.update(Leds.privacy_on()) myCorrectionMin=0.2 myCorrectionMax=0.2 maxPW=(2.0+myCorrectionMax)/1000 minPW=(1.0-myCorrectionMin)/1000 camera.start_preview() tone_player = TonePlayer(BUZZER_GPIO, bpm=70) tone_player.play(*LOAD_SOUND) #servo = AngularServo(PIN_A, min_pulse_width=minPW, max_pulse_width=maxPW) servo = AngularServo(PIN_A, max_pulse_width = maxPW) #servo = AngularServo(PIN_A) annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for i, result in enumerate(inference.run()): if i == args.num_frames: break faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() print('Iteration #%d: num_faces=%d' % (i, len(faces))) if faces: previous_angle = 0 leds.update(Leds.rgb_on(Color.BLUE)) if face_detected_on_prev_frame: angle, distance = face_data(face) #if angle < min_angle: # angle = min_angle #if angle > max_angle: # angle = max_angle servo.angle = angle*(-100) previous_angle = angle*(-100) print('Angle:' + str(angle)) sleep(.05) face_detected_on_prev_frame = True else: leds.update(Leds.rgb_on(Color.RED)) if not face_detected_on_prev_frame: servo.angle = previous_angle sleep(.05) pass face_detected_on_prev_frame = False camera.stop_preview()
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera,\ Leds() as leds: leds.update(Leds.privacy_on()) leds.update(Leds.rgb_on(Color.BLUE)) camera.start_preview() tone_player = TonePlayer(BUZZER_GPIO, bpm=70) #tone_player.play(*LOAD_SOUND) # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for result in inference.run(args.num_frames): faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) x, y, width, height = face.bounding_box annotator.update() if len(faces) >= 1: print( '#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f, x=%.2f, y=%.2f, width=%.2f, height=%.2f' % (inference.count, inference.rate, len(faces), avg_joy_score(faces), x, y, width, height)) distance = focal_length * real_face_width_inches / width if x > 0: alpha = x / float(1200) brightness = 254 - (distance * 2) else: alpha = .5 brightness = 254 try: leds.update( Leds.rgb_on( Color.blend(Color.BLUE, Color.RED, alpha))) b.set_light(2, 'bri', brightness) except: pass camera.annotate_text = '%d inches' % distance else: pass camera.stop_preview()
def objdet(): with CameraInference(ObjectDetection.model()) as inference: print("Camera inference started") player.play(*MODEL_LOAD_SOUND) last_time = time() pics = 0 save_pic = False enable_label = True # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) def leftCorner(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y) def truncateFloat(value): return '%.3f' % (value) for f, result in enumerate(inference.run()): annotator.clear() detections = enumerate( ObjectDetection.get_objects(result, 0.3)) for i, obj in detections: print('%s', obj.label) annotator.bounding_box(transform(obj.bounding_box), fill=0) if enable_label: annotator.text( leftCorner(obj.bounding_box), obj.label + " - " + str(truncateFloat(obj.score))) print('%s Object #%d: %s' % (strftime("%Y-%m-%d-%H:%M:%S"), i, str(obj))) x, y, width, height = obj.bounding_box if obj.label == 'chair': #dt = datetime.datetime.now() #os.system("ffplay -nodisp -autoexit /home/pi/AIY-projects-python/src/LorecObjectSoundFiles/insan.mp3") #query = ("INSERT INTO Log (Time, Location, GlassNameDbid, ModulDbid, Screenshot, Tag, Distance) VALUES ('"+ dt+"', 'Ankara', '1', '2', 'No Screenshot', 'Insan', '150')") #save_pic = True player.play(*BEEP_SOUND) #elif obj.label == 'tvmonitor': #os.system("ffplay -nodisp -autoexit /home/pi/AIY-projects-python/src/LorecObjectSoundFiles/Ekran.mp3") # save the image if save_pic: # save the clean image camera.capture("images/image_%s.jpg" % strftime("%Y%m%d-%H%M%S")) pics += 1 save_pic = False if f == args.num_frames or pics == args.num_pics: break annotator.update() now = time() duration = (now - last_time) # The Movidius chip runs at 35 ms per image. # Then there is some additional overhead for the object detector to # interpret the result and to save the image. If total process time is # running slower than 50 ms it could be a sign the CPU is geting overrun if duration > 0.50: print( "Total process time: %s seconds. Bonnet inference time: %s ms " % (duration, result.duration_ms)) last_time = now
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=-1, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() with PiCamera() as camera: camera.sensor_mode = 4 camera.resolution = (1640, 1232) camera.framerate = 30 camera.start_preview() servo = Servo(PIN_A, min_pulse_width=.0005, max_pulse_width=.0019) servo.mid() position = 0 zero_counter = 0 annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) with CameraInference(face_detection.model()) as inference: for i, result in enumerate(inference.run()): if i == args.num_frames: break faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() print('Iteration #%d: num_faces=%d' % (i, len(faces))) if faces: face = faces[0] x, y, width, height = face.bounding_box print(' : Face is at %d' % x) if x < 300: print(' : Face left of center') position = position - 0.1 if position < -1: position = -0.99 elif x > 500: print(' : Face right of center') position = position + 0.1 if position > 1: position = 0.99 else: print(' : Face in CENTER of image') positon = position servo.value = position else: zero_counter = zero_counter + 1 if zero_counter == 100: servo.mid() position = 0 print(' :Ignoring you') zero_counter = 0 camera.stop_preview()
def monitor_run(num_frames, preview_alpha, image_format, image_folder, enable_streaming, streaming_bitrate, mdns_name, width, height, fps, region, enter_side, use_annotator, url, uname, pw, image_dir, dev): # Sign the device in and get an access and a refresh token, if a password and username provided. access_token = None refresh_token = None tokens = None start_token_timer = timer() if uname is not None and pw is not None: try: tokens = connect_to_server(url, uname, pw) access_token = tokens['access'] refresh_token = tokens['refresh'] print(access_token) print(refresh_token) except: print("Could not get tokens from the server.") pass # location where we want to send the faces + status for classification on web server. classification_path = url + "/" + image_dir done = threading.Event() def stop(): logger.info('Stopping...') done.set() # Get the region center point and two corners r_center = (region[0] + region[2] / 2, region[1] + region[3] / 2) r_corners = (region[0], region[0] + region[2], region[1], region[1] + region[3]) signal.signal(signal.SIGINT, lambda signum, frame: stop()) signal.signal(signal.SIGTERM, lambda signum, frame: stop()) logger.info('Starting...') with contextlib.ExitStack() as stack: leds = stack.enter_context(Leds()) board = stack.enter_context(Board()) player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, framerate=fps, resolution=(width, height))) stack.enter_context(PrivacyLed(leds)) # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = None if use_annotator: annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / width scale_y = 240 / height server = None if enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=streaming_bitrate, mdns_name=mdns_name)) def model_loaded(): logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(camera) if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) board.button.when_pressed = take_photo joy_moving_average = moving_average(10) joy_moving_average.send(None) # Initialize. joy_threshold_detector = threshold_detector(JOY_SCORE_LOW, JOY_SCORE_HIGH) joy_threshold_detector.send(None) # Initialize. previous_faces3 = [] previous_faces2 = [] previous_faces = [] num_faces = 0 for faces, frame_size in run_inference(num_frames, model_loaded): # If 4 mins have passed since access token obtained, refresh the token. end_token_timer = timer() # time in seconds if refresh_token is not None and end_token_timer - start_token_timer >= 240: tokens = refresh_access_token(url, refresh_token) access_token = tokens["access"] photographer.update_faces((faces, frame_size)) joy_score = joy_moving_average.send(average_joy_score(faces)) animator.update_joy_score(joy_score) event = joy_threshold_detector.send(joy_score) if event == 'high': logger.info('High joy detected.') player.play(JOY_SOUND) elif event == 'low': logger.info('Low joy detected.') player.play(SAD_SOUND) num_previous_faces = num_faces if use_annotator: annotator.clear() annotator.bounding_box(transform(region, scale_x, scale_y), fill=0) num_faces = 0 tmp_arr = [] faces_in_region = [] photo_taken = False image = None for face in faces: face_center = (face.bounding_box[0] + face.bounding_box[2] / 2, face.bounding_box[1] + face.bounding_box[3] / 2) # check if the center of the face is in our region of interest: if r_corners[0] <= face_center[0] <= r_corners[1] and \ r_corners[2] <= face_center[1] <= r_corners[3]: if not photo_taken: stream = io.BytesIO() with stopwatch('Taking photo'): camera.capture(stream, format=image_format, use_video_port=True) stream.seek(0) image = Image.open(stream) photo_taken = True num_faces = num_faces + 1 faces_in_region.append(face) # creates a tuple ( image of the face, entering/exiting status) tmp_arr.append([ crop_face(image, image_format, image_folder, face.bounding_box), get_status(face.bounding_box, r_center, enter_side) ]) if use_annotator: annotator.bounding_box( transform(face.bounding_box, scale_x, scale_y), fill=0) # draw a box around the face if server: server.send_overlay( svg_overlay(faces_in_region, frame_size, region, joy_score)) if use_annotator: annotator.update() if num_faces < num_previous_faces: # loop through previous faces: send face data, image and status print(" A face left the region: send previous face data") #if not use_annotator: #take_photo() faces_to_use = previous_faces if previous_faces2: faces_to_use = previous_faces2 if previous_faces3: faces_to_use = previous_faces3 for face in faces_to_use: print(classification_path, face, access_token) if access_token is not None: print("sent face with access token") send_face(classification_path, face, access_token, dev) previous_faces3 = previous_faces2 previous_faces2 = previous_faces previous_faces = tmp_arr if done.is_set(): break
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--sparse', '-s', action='store_true', default=False, help='Use sparse tensors.') parser.add_argument('--threshold', '-t', type=float, default=0.3, help='Detection probability threshold.') parser.add_argument('--cam_width', type=int, default=1640, help='Camera Width') parser.add_argument('--cam_height', type=int, default=1232, help='Camera Height') parser.add_argument('--fps', type=int, default=30, help='Camera Frames Per Second') parser.add_argument( '--region', nargs=4, type=int, default=[1040, 600, 600, 632], help='Region for entering/exiting face detection: x, y, width, height') parser.add_argument( '--enter_side', type=int, default=1, help= 'Used to determine which side of the region should be considered "entering": 1 = right, 0 = left' ) args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(args.cam_width, args.cam_height), framerate=args.fps) as camera: camera.start_preview() # Get the camera width and height width = args.cam_width height = args.cam_height # Get the region center point and two corners r_center = (args.region[0] + args.region[2] / 2, args.region[1] + args.region[3] / 2) r_corners = (args.region[0], args.region[0] + args.region[2], args.region[1], args.region[1] + args.region[3]) # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / width scale_y = 240 / height # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) # Both incoming boxes and the user defined region are of the form (x, y, width, height). # Determines whether the face is entering or exiting the region. def get_status(bounding_box): face_center = (bounding_box[0] + bounding_box[2] / 2, bounding_box[1] + bounding_box[3] / 2) if face_center[0] > r_center[0]: if args.enter_side == 0: return True else: return False else: if args.enter_side == 0: return False else: return True previous_faces = [] num_previous_faces = 0 num_faces = 0 faces = [] stream = io.BytesIO() with CameraInference(face_detection.model()) as inference: for result in inference.run(args.num_frames): # get the frame as a picture num_previous_faces = num_faces faces = face_detection.get_faces(result) annotator.clear() annotator.bounding_box(transform(args.region), fill=0) num_faces = 0 tmp_arr = [] for face in faces: face_center = (face.bounding_box[0] + face.bounding_box[2] / 2, face.bounding_box[1] + face.bounding_box[3] / 2) # check if the center of the face is in our region of interest: annotator.bounding_box( transform(face.bounding_box), fill=0) # draw a box around the face annotator.update() camera.stop_preview()
def main(): env = Env() env.read_env() parser = argparse.ArgumentParser() parser.add_argument('--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') args = parser.parse_args() # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: camera.start_preview() # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / 1640 scale_y = 240 / 1232 # Incoming boxes are of the form (x, y, width, height). Scale and # transform to the form (x1, y1, x2, y2). def transform(bounding_box): x, y, width, height = bounding_box return (scale_x * x, scale_y * y, scale_x * (x + width), scale_y * (y + height)) joy_counter = 0 with CameraInference(face_detection.model()) as inference: for result in inference.run(args.num_frames): faces = face_detection.get_faces(result) annotator.clear() for face in faces: annotator.bounding_box(transform(face.bounding_box), fill=0) annotator.update() if len(faces) > 0: if avg_joy_score(faces) > 0.8: if joy_counter < 0: joy_counter = 0 else: joy_counter += 1 if avg_joy_score(faces) < 0.1: if joy_counter > 0: joy_counter = 0 else: joy_counter -= 1 if joy_counter > 20: print("Happy") joy_counter = 0 if joy_counter < -20: print("Sad") request_url = urllib.request.urlopen("https://www.reddit.com/r/dogpictures/random.json") result = json.loads(request_url.read().decode())[0]["data"]["children"][0]["data"]["url"] message = Mail( from_email='*****@*****.**', to_emails='*****@*****.**', subject='Sending with Twilio SendGrid is Fun', html_content='<img src='+result+'>') try: sg = SendGridAPIClient(env.str('SENDGRID_API_KEY')) response = sg.send(message) print(response.status_code) # print(response.body) # print(response.headers) except Exception as e: print(e.message) joy_counter = 0 else: joy_counter = 0 # print('#%05d (%5.2f fps): num_faces=%d, avg_joy_score=%.2f' % # (inference.count, inference.rate, len(faces), avg_joy_score(faces))) camera.stop_preview()