class BirdInference(): def __init__(self): print("starting inference..") self.inference = ImageInference( inaturalist_classification.model(inaturalist_classification.BIRDS)) def run(self, image): self.result = self.inference.run(image) self.bird_class = inaturalist_classification.get_classes(self.result, top_k=1, threshold=0.8) if len(self.bird_class) == 0: # if nothing is found, return none return None elif self.bird_class[0][ 0] == 'background': # if background is found, return none return None else: # If a bird clas is returned, flip the image, run again and ensure same class is returned self.result_2 = self.inference.run(ImageOps.mirror(image)) self.bird_class_2 = inaturalist_classification.get_classes( self.result_2, top_k=1, threshold=0.8) if len(self.bird_class_2) == 0: return None else: if self.bird_class_2[0][0] == self.bird_class[0][0]: return self.bird_class[0] else: return None
class ImgCap(io.IOBase): ''' Capturing Image from a Raspicam (V2.1) ''' def __init__(self, model, frameWidth=240, frameHeight=240, DEBUG=False): # Init the stuff we are inheriting from super().__init__() self.DEBUG = DEBUG self.inference = ImageInference(model) # Set video frame parameters self.frameWidth = frameWidth self.frameHeight = frameHeight self.prev_time = time.time() self.output = None def writable(self): ''' To be a nice file, you must have this method ''' return True def write(self, b): ''' Here is where the image data is received and made available at self.output ''' try: # b is the numpy array of the image, 3 bytes of color depth self.output = np.reshape(np.frombuffer(b, dtype=np.uint8), (self.frameHeight, self.frameWidth, 3)) image_center, offset = crop_center(Image.fromarray(self.output)) result = self.inference.run(image_center) if self.DEBUG: print(f"ImgCap - Inference result: {result}") print(f"ImgCap - Image.shape {self.output.shape}") print( f"ImgCap - Running at {1/(time.time()-self.prev_time):2.2f} Hz" ) self.prev_time = time.time() except Exception as e: print("ImgCap error: {}".format(e)) finally: return len(b)
class ImgCap(io.IOBase): ''' Capturing Image from a Raspicam (V2.1) ''' def __init__(self, model, frameWidth=240, frameHeight=240, DEBUG=False): # Init the stuff we are inheriting from super().__init__() self.inference = ImageInference(model) self.ANCHORS = np.genfromtxt( "/opt/aiy/models/mobilenet_ssd_256res_0.125_person_cat_dog_anchors.txt" ) self.DEBUG = DEBUG # Set video frame parameters self.frameWidth = frameWidth self.frameHeight = frameHeight self.prev_time = time.time() self.output = None def writable(self): ''' To be a nice file, you must have this method ''' return True def write(self, b): ''' Here is where the image data is received and made available at self.output ''' try: # b is the numpy array of the image, 3 bytes of color depth self.output = np.reshape(np.frombuffer(b, dtype=np.uint8), (self.frameHeight, self.frameWidth, 3)) image_center, offset = crop_center(Image.fromarray(self.output)) result = self.inference.run(image_center) # The weird shapes used for concat and concat_1 are just copying the output tensors # shapes when using the model directly from tensorflow. # => #classes: number of classes during training (the number used in the config file) concat = np.asarray(result.tensors['concat'].data).reshape( (1, 1278, 1, 4)) #(1, 1278*#classes, 1, 4) concat_1 = np.asarray(result.tensors['concat_1'].data).reshape( (1, 1278, 2)) #(1, 1278, #classes + 1) # # Ideally, it would be nice to use np.frombuffer instead of creating new arrays. Another option is # to create the arrays inside init and just repopulate (so no new memory allocation) because the # tensors have fixed sizes. # # Increasing score_threshold will make things faster because the Non Maximum Suppression # stuff will work on a much smaller set of boxes. detection_boxes, detection_scores, detection_classes = process_output_tensor( concat, concat_1, self.ANCHORS, classes=[1], IoU_thres=0.5, raw_boxes=False, score_threshold=0.3) if self.DEBUG: print(f"Boxes: {detection_boxes}") print(f"Scores: {detection_scores}") print("ImgCap - Inference done!") print("ImgCap - Image.shape {}".format(self.output.shape)) print("ImgCap - Running at {:2.2f} Hz".format( 1 / (time.time() - self.prev_time))) self.prev_time = time.time() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() print(exc_type, exc_obj, exc_tb.tb_lineno) print("ImgCap error: {}".format(e)) finally: return len(b)
parser.add_argument('--output_key', required=True) args = parser.parse_args() image = Image.open(args.input) width, height = image.size model = ModelDescriptor(name=args.model_name, input_shape=(1, args.input_size, args.input_size, 3), input_normalizer=(128, 128), compute_graph=utils.load_compute_graph( args.model_path)) inference = ImageInference(model) if inference: starttime = datetime.now() result = inference.run(image) deltatime = datetime.now() - starttime print( str(deltatime.seconds) + "s " + str(deltatime.microseconds / 1000) + "ms") assert len(result.tensors) == 1 tensor = result.tensors[args.output_key] probs = tuple(tensor.data) pairs = [pair for pair in enumerate(probs) if pair[1] > 0.1] pairs = sorted(pairs, key=lambda pair: pair[1], reverse=True) pairs = pairs[0:5] _CLASSES = utils.load_labels(args.label_path) classes = [('/'.join(_CLASSES[index]), prob) for index, prob in pairs]
def main(): """Face detection camera inference example.""" parser = argparse.ArgumentParser() parser.add_argument( '--num_frames', '-n', type=int, dest='num_frames', default=None, help='Sets the number of frames to run for, otherwise runs forever.') # args = parser.parse_args() with open("./key.key", "rb") as fp: key = fp.read() crypt = Fernet(key) client = connect_to_socket() validation_msg = crypt.encrypt(b"VISION") client.sendall(validation_msg) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. total_cam = 0 resolution = (800, 500) with PiCamera(resolution=resolution, sensor_mode=4) as camera: camera.start_preview() print('Camera starting......') sleep(2) stream = BytesIO() inference = ImageInference(face_detection.model()) IM_FOLDER = '/home/pi/iot/images' EMAIL = "*****@*****.**" i = 0 run = get_order(client) while run: stream = BytesIO() detected = False camera.capture(stream, format='jpeg') print(i) i += 1 stream.seek(0) image = Image.open(stream) faces = face_detection.get_faces(inference.run(image)) if len(faces) > 0: print("Found face") draw = ImageDraw.Draw(image) for face in faces: x, y, width, height = face.bounding_box area_ratio = (width * height) / (resolution[0] * resolution[1]) if area_ratio < 0.06: stream.close() continue detected = True draw.rectangle((x, y, x + width, y + height), outline='red') print('Face : {}: ration : {:.2f}'.format( face, area_ratio)) if detected: now = str(datetime.datetime.now()) imname = IM_FOLDER + '/face_%s.jpg' % (now) image.save(imname, 'JPEG') stream.seek(0) with stream: data = stream.read() # send through tcp send_data(client, data, type="image") subprocess.call( "mpack -s 'visitor at your door' '{}' {} ".format( imname, EMAIL), shell=True) run = get_order(client) if not run: #client.close() continue total_cam += 1 print('Face %d captured' % (total_cam)) stream.close() sleep(0.1) inference.close() camera.stop_preview()