def main(): cv_publisher = Publisher(105) MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/' MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_pupper_quant_edgetpu.tflite' LABEL_PATH = MODELS_DIR + 'pupper_labels.txt' LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt' labels = dataset_utils.read_label_file(LABEL_PATH) engine = DetectionEngine(MODEL_PATH) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() try: stream = io.BytesIO() #count = 0 for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) #image = Image.frombuffer('RGB',(width,height), stream.getvalue()) image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb' #draw = ImageDraw.Draw(image) start_ms = time.time() results = engine.detect_with_image(image,threshold=0.2,keep_aspect_ratio=True,relative_coord=False,top_k=10) elapsed_ms = time.time() - start_ms detectedObjs = [] for obj in results: if (obj.label_id in range(3)): box = obj.bounding_box.flatten().tolist() #draw.rectangle(box, outline='red') #draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score)) w = box[0] - box[2] h = box[1] - box[3] objInfo = {'bbox_x':float(box[0]), 'bbox_y':float(box[1]), 'bbox_h':float(h), 'bbox_w':float(w), 'bbox_label':labels[obj.label_id], 'bbox_confidence': float(obj.score) } detectedObjs.append(objInfo) try: cv_publisher.send(detectedObjs) except BaseException as e: print('Failed to send bounding boxes. CV UDP subscriber likely not initialized') pass #print(detectedObjs) #with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images_120120/' + str(count) + '.png','wb') as f: # image.save(f) #count+=1 except BaseException as e: with open(LOG_FILE,'w') as f: f.write("Failed to run detection loop:\n {0}\n".format(traceback.format_exc()))
def detection_job(detection_model, image_name, num_inferences): """Runs detection job.""" engine = DetectionEngine(detection_model) with open_image(image_name) as img: # Resized image. _, height, width, _ = engine.get_input_tensor_shape() tensor = np.asarray(img.resize((width, height), Image.NEAREST)).flatten() # Using `detect_with_input_tensor` to exclude image down-scale cost. for _ in range(num_inferences): engine.detect_with_input_tensor(tensor, top_k=1)
def main(argv): argparser = build_argparser() args = argparser.parse_args(argv) labels = load_labels(args.label) engine = DetectionEngine(args.model) camera = picamera.PiCamera() camera.resolution = (640, 480) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0)) overlay = camera.add_overlay(overlay_img.tobytes(), size=overlay_img.size) overlay.layer = 3 try: start_time = time.time() camera.start_preview(fullscreen=True) buff = io.BytesIO() for _ in camera.capture_continuous(buff, format='rgb', use_video_port=True, resize=(width, height)): buff.truncate() buff.seek(0) array = np.frombuffer(buff.getvalue(), dtype=np.uint8) # Do inference start_ms = time.time() detected = engine.DetectWithInputTensor(array, top_k=10) elapsed_ms = time.time() - start_ms if detected: camera.annotate_text = ('%d objects detected.\n%.2fms' % (len(detected), elapsed_ms * 1000.0)) overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0)) draw = ImageDraw.Draw(overlay_img) for obj in detected: # relative coord to abs coord. box = obj.bounding_box * [[width, height]] draw.rectangle(box.flatten().tolist(), COLORS[obj.label_id % len(COLORS)]) overlay.update(overlay_img.tobytes()) if time.time() - start_time >= args.time: break finally: camera.stop_preview() camera.close() return 0
def main(): cv_publisher = Publisher(105) MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/' MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite' LABEL_PATH = MODELS_DIR + 'coco_labels.txt' LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt' labels = dataset_utils.read_label_file(LABEL_PATH) engine = DetectionEngine(MODEL_PATH) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() stream = io.BytesIO() count = 0 for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) #image = Image.frombuffer('RGB',(width,height), stream.getvalue()) image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb' draw = ImageDraw.Draw(image) start_ms = time.time() results = engine.detect_with_image(image,threshold=0.1,keep_aspect_ratio=True,relative_coord=False,top_k=51) elapsed_ms = time.time() - start_ms detectedObjs = [] for obj in results: if (obj.label_id == 0 or obj.label_id == 36): if (obj.label_id == 36): print('Tennis ball detected') box = obj.bounding_box.flatten().tolist() draw.rectangle(box, outline='red') draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score)) w = box[0] - box[2] h = box[1] - box[3] objInfo = {'bbox_x':float(box[0]), 'bbox_y':float(box[1]), 'bbox_h':float(h), 'bbox_w':float(w), 'bbox_label':labels[obj.label_id], 'bbox_confidence': float(obj.score) } detectedObjs.append(objInfo) cv_publisher.send(detectedObjs) #print(detectedObjs) with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '.png','wb') as f: image.save(f) count+=1
class CoralObjectDetector: """Performs inference on Edge TPU. """ def __init__(self, model_path, device_path): self.__engine = DetectionEngine(model_path=os.path.join( model_path, 'edgetpu.tflite'), device_path=device_path) self.__model_shape = itemgetter(1, 2)( self.__engine.get_input_tensor_shape()) @property def device_name(self): return "Coral" def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): pass def detect(self, image_shape, image_np, detections: List[Detection]): image_np = cv2.resize(image_np, dsize=self.__model_shape, interpolation=cv2.INTER_LINEAR) objs = self.__engine.detect_with_input_tensor( input_tensor=image_np.flatten(), top_k=len(detections)) d = 0 max_width = image_shape[1] - 1 max_height = image_shape[0] - 1 while d < len(objs) and d < len(detections): detection = detections[d] obj = objs[d] detection.label = obj.label_id + 1 detection.confidence = obj.score detection.bounding_box.y_min = int(obj.bounding_box[0][1] * max_height) detection.bounding_box.x_min = int(obj.bounding_box[0][0] * max_width) detection.bounding_box.y_max = int(obj.bounding_box[1][1] * max_height) detection.bounding_box.x_max = int(obj.bounding_box[1][0] * max_width) d += 1 return self.__engine.get_inference_time()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) args = parser.parse_args() with open(args.label, 'r', encoding="utf-8") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) engine = DetectionEngine(args.model) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) frame = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() results = engine.DetectWithImage(frame, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms if results: logging.info("frame has {} objects".format(len(results))) for detected_object in results: logging.info("label: {}, score: {}, bounds: {}".format( labels[detected_object.label_id], detected_object.score, obj.bounding_box.flatten().tolist())) finally: logging.info("done capturing")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--object_type', help='Type of object to capture images of', required=True) parser.add_argument('--n', help='Number of frames to acquire', required=True) args = parser.parse_args() print('Setting up to capture ' + args.n + ' images of type ' + args.object_type) LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/training_image_acquistion_log.txt' SAVE_PATH = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/training_images/' + args.object_type + '/' if not os.path.isdir(SAVE_PATH): os.mkdir(SAVE_PATH) count = 0 else: with open(SAVE_PATH + 'image_count.pkl','rb') as f: count = pickle.load(f) engine = DetectionEngine('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite') input('Press ENTER to begin capturing frames...') n = 0 with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() try: stream = io.BytesIO() for _ in camera.capture_continuous(SAVE_PATH + args.object_type + '_{timestamp:%f}.png', format='png', use_video_port=True, resize=(width, height)): #stream.truncate() #stream.seek(0) #input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) #image = Image.frombuffer('RGB',(320,304), stream.getvalue()) #with open(SAVE_PATH + args.object_type + '_' + str(count) + '.jpg','wb') as f: # image.save(f) count += 1 n += 1 if (n >= int(args.n)): break else: print('Captured image #' + str(n)) except BaseException as e: with open(LOG_FILE,'w') as f: f.write("Failed to run image acquisition loop: {0}\n".format(str(e))) with open(SAVE_PATH + 'image_count.pkl','wb') as f: pickle.dump(count,f)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='class score threshold') args = parser.parse_args() print("Loading %s with %s labels."%(args.model, args.labels)) engine = DetectionEngine(args.model) labels = load_labels(args.labels) input_shape = engine.get_input_tensor_shape() inference_size = (input_shape[1], input_shape[2]) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(30) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() objs = engine.detect_with_input_tensor(input_tensor, threshold=args.threshold, top_k=args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: %.2f ms' %((end_time - start_time) * 1000), 'FPS: %d fps' % (round(next(fps_counter))), ] print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines) result = gstreamer.run_pipeline(user_callback, appsink_size=inference_size)
class ObjectDetector: def init( self, model_file="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", label_file="coco_labels.txt"): self.model_file = model_file self.label_file = label_file self.labels = ReadLabelFile(self.label_file) self.engine = DetectionEngine(self.model_file) def detect(self, input_frame): if (self.labels == '' or self.engine == ''): print("Detector is not initialized!") return [] objects = self.engine.DetectWithInputTensor(input_frame.flatten(), threshold=0.5, top_k=10) _, width, height, channels = self.engine.get_input_tensor_shape() detected_objects = [] if objects: for obj in objects: box = obj.bounding_box.flatten().tolist() box_left = int(box[0] * width) box_top = int(box[1] * height) box_right = int(box[2] * width) box_bottom = int(box[3] * height) percentage = int(obj.score * 100) label = self.labels[obj.label_id] object_info = { 'box_left': box_left, 'box_right': box_right, 'box_top': box_top, 'box_bottom': box_bottom, 'percentage': percentage, 'label': label } detected_objects.append(object_info) return detected_objects
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. ans = engine.DetectWithImage( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) # Display result. if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) if ans: for obj in ans: if labels and obj.label_id in labels: # Draw a mask rectangle. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in ans: if labels and obj.label_id in labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def mqtt_device_demo(args): """Connects a device, sends data, and receives data.""" # [START iot_mqtt_run] global minimum_backoff_time global MAXIMUM_BACKOFF_TIME # Publish to the events or state topic based on the flag. sub_topic = 'events' if args.message_type == 'event' else 'state' mqtt_topic = '/devices/{}/{}'.format(args.device_id, sub_topic) jwt_iat = datetime.datetime.utcnow() jwt_exp_mins = args.jwt_expires_minutes client = get_client( args.project_id, args.cloud_region, args.registry_id, args.device_id, args.private_key_file, args.algorithm, args.ca_certs, args.mqtt_bridge_hostname, args.mqtt_bridge_port) # Initialize engine. engine = DetectionEngine('./edgetpu/test_data/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') # labels = ReadLabelFile(args_tpu.label) if args_tpu.label else None # initializing the camera with picamera.PiCamera() as camera: camera.resolution = (1024, 768) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): client.loop() if should_backoff: # If backoff time is too large, give up. if minimum_backoff_time > MAXIMUM_BACKOFF_TIME: print('Exceeded maximum backoff time. Giving up.') break delay = minimum_backoff_time + random.randint(0, 1000) / 1000.0 time.sleep(delay) minimum_backoff_time *= 2 client.connect(mqtt_bridge_hostname, mqtt_bridge_port) now = datetime.datetime.now() stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10) elapsed_ms = time.time() - start_ms # Display result. print ('-----------------------------------------') nPerson = 0 bbox = list() scores = list() if ans: for obj in ans: nPerson = nPerson + 1 # if labels: # print(labels[obj.label_id]) score = [obj.score] # print ('score = ', obj.score) box = obj.bounding_box.flatten().tolist() # print ('box = ', box) bbox.append(box) scores.append(score) msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)} else: msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)} bounding_box = [{'box_0': bb[0], 'box_1': bb[1], 'box_2': bb[2], 'box_3': bb[3]} for bb in eval(msg["bounding_box"])] scores_msg = [{'score': s[0]} for s in eval(msg["scores"])] info = {'nPersons': '{}'.format(nPerson), 'bounding_box': bounding_box, 'scores': scores_msg, 'TimeStamp': str(int(time.time()))} ################################### try: list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson, length='short') except: list_short = [] list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson, length='short') try: list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson, length='long') except: list_long = [] list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson, length='long') info.update(info_short) info.update(info_long) ################################### info = json.dumps(info) payload = info print('Publishing message {}'.format(payload)) # [START iot_mqtt_jwt_refresh] seconds_since_issue = (datetime.datetime.utcnow() - jwt_iat).seconds if seconds_since_issue > 60 * jwt_exp_mins: # print('Refreshing token') jwt_iat = datetime.datetime.utcnow() client = get_client( args.project_id, args.cloud_region, args.registry_id, args.device_id, args.private_key_file, args.algorithm, args.ca_certs, args.mqtt_bridge_hostname, args.mqtt_bridge_port) # [END iot_mqtt_jwt_refresh] # Publish "payload" to the MQTT topic. qos=1 means at least once # delivery. Cloud IoT Core also supports qos=0 for at most once # delivery. client.publish(mqtt_topic, payload, qos=1) # Send events every second. State should not be updated as often time.sleep(1 if args.message_type == 'event' else 5) finally: camera.stop_preview()
def inference_thread(running, state, result_buffer, frame_buffer, args, identity_dict, current_identity): global IDLE, TRACK, RESET, FACE_RECOG_THRESHOLD, FACE_RECOG_THRESHOLD_A global od_engine, face_detector, facenet_engine, svm_clf # Initialize object detection engine. od_engine = DetectionEngine(args.od_model) print("device_path: ", od_engine.device_path()) _, od_width, od_height, _ = od_engine.get_input_tensor_shape() print("od input dim: ", od_width, od_height) # initial face detector using the opencv haarcascade model face_detector = FaceDetector(args.hc_model) # Initialize facenet engine. facenet_engine = BasicEngine(args.fn_model) # load the sklearn support vector machine model from disk svm_clf = pickle.load(open(args.svm_model, 'rb')) while running.value: # check if the frame buffer has a frame, else busy waiting if frame_buffer.empty(): continue frame = frame_buffer.get() tinf = time.perf_counter() if state.value == IDLE: fd_results = None # reorder image frame from BGR to RGB img = frame[:,:,::-1] # face detection faces_coord = face_detector.detect(img, True) # image preprocessing, downsampling print("faces_coord: ",faces_coord) if not isinstance(faces_coord, type(None)): # normalize face image face_image = np.array(normalize_faces(img ,faces_coord)) # facenet to generate face embedding facenet_engine.RunInference(face_image.flatten()) face_emb = facenet_engine.get_raw_output().reshape(1,-1) # use SVM to classfy identity with face embedding pred_prob = svm_clf.predict_proba(face_emb) best_class_index = np.argmax(pred_prob, axis=1)[0] best_class_prob = pred_prob[0, best_class_index] print("best_class_index: ",best_class_index) print("best_class_prob: ",best_class_prob) print("label", svm_clf.classes_[best_class_index]) # Check threshold and verify identify is in the identifiy dictionary if best_class_prob > FACE_RECOG_THRESHOLD: face_label = svm_clf.classes_[best_class_index] if face_label in identity_dict: print("\n=================================") print("Identity found: ", face_label, " ",identity_dict[face_label], " with Prob = ", best_class_prob) print("=================================\n") current_identity.value = identity_dict[face_label][0] # ID result_buffer.put(faces_coord) elif state.value == TRACK: od_results = None # convert numpy array representation to PIL image with rgb format img = Image.fromarray(frame[:,:,::-1], 'RGB') # Run inference. od_results = od_engine.DetectWithImage(img, threshold=0.30, keep_aspect_ratio=True, relative_coord=False, top_k=10) # push result to buffer queue result_buffer.put(od_results) print(time.perf_counter() - tinf, "sec") print("[Finish] inference_thread")
def main(): mot = MotorController() model_filename = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite" label_filename = "coco_labels.txt" engine = DetectionEngine(model_filename) labels = _read_label_file(label_filename) CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 fnt = ImageFont.load_default() # To view preview on VNC, # https://raspberrypi.stackexchange.com/a/74390 with picamera.PiCamera() as camera: _monkey_patch_picamera() camera.resolution = (CAMERA_WIDTH, CAMERA_HEIGHT) camera.framerate = 15 camera.rotation = 180 _, width, height, channels = engine.get_input_tensor_shape() print("{}, {}".format(width, height)) overlay_renderer = None camera.start_preview() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True): # Make Image object from camera stream stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) input = input.reshape((CAMERA_HEIGHT, CAMERA_WIDTH, 3)) image = Image.fromarray(input) # image.save("out.jpg") # Make overlay image plane img = Image.new('RGBA', (CAMERA_WIDTH, CAMERA_HEIGHT), (255, 0, 0, 0)) draw = ImageDraw.Draw(img) draw.line((CAMERA_WIDTH//2, 0, CAMERA_WIDTH//2, CAMERA_HEIGHT), width=1) draw.line((CAMERA_WIDTH//4, 0, CAMERA_WIDTH//4, CAMERA_HEIGHT), width=1) draw.line((3*CAMERA_WIDTH//4, 0, 3*CAMERA_WIDTH//4, CAMERA_HEIGHT), width=1) # Run detection start_ms = time.time() results = engine.DetectWithImage(image, threshold=0.2, top_k=5) elapsed_ms = (time.time() - start_ms)*1000.0 obj = None if results: obj = next((x for x in results if labels[x.label_id] == "banana"), None) if obj: box = obj.bounding_box.flatten().tolist() box[0] *= CAMERA_WIDTH box[1] *= CAMERA_HEIGHT box[2] *= CAMERA_WIDTH box[3] *= CAMERA_HEIGHT draw.rectangle(box, outline='red') draw.text((box[0], box[1]-10), labels[obj.label_id], font=fnt, fill="red") obj_width = box[2] - box[0] obj_center = box[0] + obj_width // 2 draw.point((obj_center, box[1] + (box[3] - box[1])//2)) print(obj_center - CAMERA_WIDTH // 2) if (obj_center - CAMERA_WIDTH // 2) > CAMERA_WIDTH // 4: print("TURN R") mot.turn_r(radius=30) elif (obj_center - CAMERA_WIDTH // 2) < -CAMERA_WIDTH // 4: print("TURN L") mot.turn_l(radius=30) elif obj_width < CAMERA_WIDTH / 4: print("FORWARD") mot.forward() else: mot.stop() camera.annotate_text = "{0:.2f}ms".format(elapsed_ms) else: mot.stop() if not overlay_renderer: overlay_renderer = camera.add_overlay( img.tobytes(), size=(CAMERA_WIDTH, CAMERA_HEIGHT), layer=4, alpha=255) else: overlay_renderer.update(img.tobytes()) finally: mot.stop() if overlay_renderer: camera.remove_overlay(overlay_renderer) camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) parser.add_argument('--synchronous', help='Use to do anlysis synchronously.', required=False, action="store_true", default=False) parser.add_argument('--local', help='send output to local window, instead of twitch', required=False, action="store_true", default=False) args = parser.parse_args() with open(args.label, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) engine = DetectionEngine(args.model) try: cap = cv2.VideoCapture(2) _, width, height, channels = engine.get_input_tensor_shape() font = cv2.FONT_HERSHEY_SIMPLEX is_processing = True child = None child_result = None while True: ret, frame = cap.read() # the resized version of the frame will be used for analysis resized = cv2.resize(frame, (width, height)) if args.synchronous: child_result = [] analyze_frame(resized, child_result, engine, labels, args.local) else: if not is_processing: # kick off analysis in subprocess, if not currently analyzing is_processing = True next_child_result = [] child = multiprocessing.Process(target=analyze_frame, args=(resized, child_result, engine, labels, args.local)) child.start() elif child is not None and not child.is_alive(): child.join() child_result = next_child_result frame = draw_boxes_on_frame(child_result, frame) if args.local: # weird black magic needed to draw to the screen if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.imshow('frame', frame) else: sys.stdout.buffer.write(frame.tobytes()) except KeyboardInterrupt: print('Shutting down...') finally: cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--draw', help='If to draw the results.', default=True) parser.add_argument('--label', help='Path of the labels file.') args = parser.parse_args() renderer = None # Initialize engine. engine = DetectionEngine(args.model) labels = read_label_file(args.label) if args.label else None shown = False frames = 0 start_seconds = time.time() print('opening socket.') # s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) receiveSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((TCP_IP, TCP_PORT)) s.listen(1) # senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) receiveSocket.bind((UDP_IP, UDP_RECEIVE_PORT)) # senderSocket.bind((UDP_IP, UDP_SEND_PORT)) print('listening...') _, width, height, channels = engine.get_input_tensor_shape() imageSize = width * height * 3 print('waiting for client') conn, addr = s.accept() print('Connection address:', addr) # Open image. while 1: print('waiting for packet') data, addr = receiveSocket.recvfrom(66507) # print('got packet of length', len(data)) if (len(data) > 0): start_s = time.time() # print('processing image') try: image = Image.open(io.BytesIO(data)).convert('RGB') except OSError: print('Could not read image') continue input = np.frombuffer(image.tobytes(), dtype=np.uint8) results = engine.DetectWithInputTensor(input, threshold=0.25, top_k=10) print('time to process image', (time.time() - start_s) * 1000) output = to_output(results, image.size, labels) message = json.dumps({'results': output}) + '|' # print('sending', message) try: conn.send(message.encode('utf-8')) except ConnectionResetError: print('Socket disconnected...waiting for client') conn, addr = s.accept()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--draw', help='If to draw the results.', default=True) parser.add_argument('--label', help='Path of the labels file.') args = parser.parse_args() renderer = None # Initialize engine. engine = DetectionEngine(args.model) labels = read_label_file(args.label) if args.label else None shown = False frames = 0 start_seconds = time.time() FULL_SIZE_W = 640 FULL_SIZE_H = 480 img = Image.new('RGBA', (FULL_SIZE_W, FULL_SIZE_H)) draw = ImageDraw.Draw(img) # Open image. with picamera.PiCamera() as camera: camera.resolution = (FULL_SIZE_W, FULL_SIZE_H) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() print('input dims', width, height) camera.start_preview(fullscreen=False, window=(700, 200, FULL_SIZE_W, FULL_SIZE_H)) # camera.start_preview() # rasberry pi requires images to be resizes to multiples of 32x16 camera_multiple = (16, 32) valid_resize_w = width - width % camera_multiple[1] valid_resize_h = height - height % camera_multiple[0] padding_w = (width - valid_resize_w) // 2 padding_h = (height - valid_resize_h) // 2 scale_w = FULL_SIZE_W / width scale_h = FULL_SIZE_H / height try: stream = io.BytesIO() for foo in camera.capture_continuous( stream, format='rgb', # format='jpeg', use_video_port=True, resize=(valid_resize_w, valid_resize_h)): stream.truncate() stream.seek(0) start_frame = time.time() input = np.frombuffer(stream.getvalue(), dtype=np.uint8) if padding_w > 0 or padding_h > 0: flattened = pad_and_flatten( input, (valid_resize_h, valid_resize_w), padding_h, padding_w) else: flattened = input # flatten padded element reshape_time = time.time() - start_frame start_s = time.time() # Run inference. results = engine.DetectWithInputTensor(flattened, threshold=0.2, top_k=10) elapsed_s = time.time() - start_frame if padding_w > 0 or padding_h > 0: boxes = translate_and_scale_boxes(\ results, \ (valid_resize_w, valid_resize_h),\ (padding_w, padding_h), \ (FULL_SIZE_W, FULL_SIZE_H)) else: boxes = scale_boxes(results, (FULL_SIZE_W, FULL_SIZE_H)) if args.draw: img.putalpha(0) draw_boxes(draw, boxes) if labels: draw_labels(draw, results, boxes, labels) # display_results(ans, labels, img) imbytes = img.tobytes() if renderer == None: renderer = camera.add_overlay(imbytes, size=img.size, layer=4, format='rgba', fullscreen=False, window=(700, 200, 640, FULL_SIZE_H)) else: # print('updating') renderer.update(imbytes) frame_seconds = time.time() # print(frame_seconds - start_seconds, frames) fps = frames * 1.0 / (frame_seconds - start_seconds) frames = frames + 1 # time.sleep(1) camera.annotate_text = "%.2fms, %d fps" % (elapsed_s * 1000.0, fps) finally: camera.stop_preview()
class App: def __init__(self): self.frame = None self.thread = None self.stopEvent = None self.camera = cv2.VideoCapture(0) self.camera.set(3, WIDTH) self.camera.set(4, HEIGHT) self.engine = DetectionEngine('./mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite') self.labels = ReadLabelFile('./coco_labels.txt') self.root = tki.Tk() self.root.bind('<Escape>', lambda e: self.onClose()) self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose) self.panel = None self.stopEvent = threading.Event() self.thread = threading.Thread(target=self.videoLoop, args=()) self.thread.daemon = True self.thread.start() def findObjects(self, image): _, width, height, channels = self.engine.get_input_tensor_shape() input = cv2.resize(image, (width, height)) input = input.reshape((width * height * channels)) results = self.engine.DetectWithInputTensor(input, top_k=5) return results def videoLoop(self): try: while not self.stopEvent.is_set(): if not self.camera.isOpened(): continue ret, self.frame = self.camera.read() if not ret: continue font = cv2.FONT_HERSHEY_SIMPLEX self.frame = cv2.cvtColor(self.frame, cv2.COLOR_BGR2RGB) results = self.findObjects(self.frame) if results: for obj in results: if(obj.score > 0.5): top_left = calculatePosition(obj.bounding_box[0]) bottom_right = calculatePosition(obj.bounding_box[1]) center_point = (int(top_left[0] + ((bottom_right[0] - top_left[0]) / 2)), int(top_left[1] + ((bottom_right[1] - top_left[1]) / 2))) # cv2.rectangle(self.frame, top_left, bottom_right, (0, 255, 0), 1) label = self.labels[obj.label_id] label_size = cv2.getTextSize(label, font, 0.5,cv2.LINE_AA) label_width = label_size[0][0] label_height = label_size[0][1] # pointer cv2.circle(self.frame, center_point, 5, (0,255,0),-1) cv2.line(self.frame, (int(top_left[0] + label_width/2),top_left[1]), center_point, (0,255,0),2) # label label_x = top_left[0] - 1 label_y = top_left[1]-label_height if label_y < 0: label_y = 0 cv2.rectangle(self.frame, (label_x, label_y), (label_x+label_width, label_y + label_height), (0,255,0),-1) cv2.putText(self.frame, label, (label_x+5, label_y + label_height-5), font, 0.5, (255,255,255)) image = Image.fromarray(self.frame) image = ImageTk.PhotoImage(image) if self.panel is None: self.panel = tki.Label(image=image) self.panel.image = image self.panel.pack(side="left", padx=0, pady=0) else: self.panel.configure(image=image) self.panel.image = image print("[INFO] closing...") self.camera.release() self.root.destroy() return -1 except Exception as e: print("[INFO] caught a RuntimeError") print(e) finally: print("[INFO] closing...") self.camera.release() self.root.destroy() return -1 def onClose(self): self.stopEvent.set()
class VideoCamera(object): def __init__(self): print('starting camera') with open(Config.LABEL_PATH, 'r', encoding="utf-8") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) self.labels = dict((int(k), v) for k, v in pairs) self.engine = DetectionEngine(Config.MODEL_PATH) # Using OpenCV to capture from device 0. If you have trouble capturing # from a webcam, comment the line below out and use a video file # instead. self.video = cv2.VideoCapture(0) if self.video: self.video.set(3, 640) self.video.set(4, 480) # If you decide to use video.mp4, you must have this file in the folder # as the main.py. # self.video = cv2.VideoCapture('video.mp4') def __del__(self): print('closing camera') self.video.release() def get_frame(self): start_time = time.time() font = cv2.FONT_HERSHEY_SIMPLEX topLeftCornerOfText = (10, 20) bottomLeftCornerOfText = (10, 470) fontScale = 0.6 fontColor = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) lineType = 1 annotate_text = "" _, width, height, channels = self.engine.get_input_tensor_shape() if not self.video.isOpened(): print('Camera is not opened') ret, img = self.video.read() if not ret: print('Camera is not read') input = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) input = cv2.resize(input, (width, height)) input = input.reshape((width * height * channels)) rows = img.shape[0] cols = img.shape[1] record_time = time.time() elapsed_record_ms = record_time - start_time ############# # Run inference. ans = self.engine.DetectWithInputTensor( input, threshold=Config.DETECT_THRESHOLD, top_k=Config.TOP_K) detection_time = time.time() elapsed_detection_ms = detection_time - record_time # Display result. if ans: for obj in ans: box = obj.bounding_box.flatten().tolist() #print ('id=', obj.label_id, 'score = ', obj.score, 'box = ', box) # Draw a rectangle. x = box[0] * cols y = box[1] * rows right = box[2] * cols bottom = box[3] * rows if obj.score > Config.DETECT_THRESHOLD: cv2.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), fontColor, thickness=1) annotate_text = "%s %.2f" % (self.labels[obj.label_id], obj.score) annotate_text_time = time.time() cv2.putText(img, annotate_text, (int(x), int(bottom)), font, fontScale, fontColor, lineType) elapsed_frame_ms = (time.time() - start_time) * 1000.0 frame_rate_text = "FPS: %.2f record: %.2fms detection: %.2fms" % ( 1000.0 / elapsed_frame_ms, elapsed_record_ms * 1000.0, elapsed_detection_ms * 1000.0) cv2.putText(img, frame_rate_text, topLeftCornerOfText, font, fontScale, fontColor, lineType) ret, jpeg = cv2.imencode('.jpg', img) return jpeg.tobytes()
class ObjectDetector(object): def __init__(self, model_path, label_path, use_coral_flag, use_tpu_flag, res_x, res_y, min_conf_threshold): self.res_y = res_y self.res_x = res_x self.use_coral_flag = use_coral_flag if use_coral_flag: from edgetpu.detection.engine import DetectionEngine from edgetpu.utils import dataset_utils self.min_conf_threshold = min_conf_threshold # Load the label map with open(label_path, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) if use_tpu_flag: self.interpreter = Interpreter( model_path=model_path, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: self.interpreter = Interpreter(model_path=model_path) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.is_floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 #Coral if use_coral_flag: self.engine = DetectionEngine(model_path) self.labels = dataset_utils.read_label_file(label_path) _, height, width, _ = self.engine.get_input_tensor_shape() def apply_coral_model(self, input_data): print("here") ans = self.engine.detect_with_input_tensor(input_data, threshold=0.05, top_k=10) print("here2") for obj in ans: if self.labels: print(self.labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) def apply_tflite_model(self, input_data): # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects return (boxes, classes, scores) def process_frame(self, frame): frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.is_floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std if self.use_coral_flag: self.apply_coral_model(input_data) scores = [] else: (boxes, classes, scores) = self.apply_tflite_model(input_data) return (frame, boxes, classes, scores) def is_interesting_object(self, scores, classes): is_interesting_object = False interesting_classes = [] for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): is_interesting_object = True interesting_classes.append(self.labels[int(classes[i])]) return is_interesting_object, interesting_classes def draw_frame(self, frame, boxes, classes, scores): # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.res_y))) xmin = int(max(1, (boxes[i][1] * self.res_x))) ymax = int(min(self.res_y, (boxes[i][2] * self.res_y))) xmax = int(min(self.res_x, (boxes[i][3] * self.res_x))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4) # Draw label object_name = self.labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text (flag, encodedImage) = cv2.imencode(".jpg", frame) return encodedImage
engine = DetectionEngine(args.model_file, device_path=args.device_path) else: engine = DetectionEngine(args.model_file) print("device path:", engine.device_path()) output_sizes = engine.get_all_output_tensors_sizes() # print("output sizes:", output_sizes) count = 0 indices = [] for i in output_sizes: count = count + i indices.append(count) # print("indices:", indices) input_tensor_shape = engine.get_input_tensor_shape() if (input_tensor_shape.size != 4 or input_tensor_shape[3] != 3 or input_tensor_shape[0] != 1): raise RuntimeError( 'Invalid input tensor shape! Expected: [1, height, width, 3]') _, height, width, _ = input_tensor_shape print("height, width:", height, width) img = Image.open(args.image) img = img.resize((width, height)) input_tensor = np.asarray(img).flatten() loop_counts = int(args.loop_counts) if (loop_counts > 1): for a in range(5):
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--top_k', help="keep top k candidates.", default=3) parser.add_argument( '--threshold', help="threshold to filter results.", default=0.5, type=float) parser.add_argument( '--width', help="Resolution width.", default=640, type=int) parser.add_argument( '--height', help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format='rgb', use_video_port=True): rawCapture.truncate(0) # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. start_ms = time.time() ans = engine.DetectWithImage(input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k) # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05, # keep_aspect_ratio=False, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms # Display result. if ans: for obj in ans: label_name = 'Unknown' if labels: label_name = labels[obj.label_id] caption = '{0}({1:.2f})'.format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = '{0:.2f}ms, {1:.2f}fps'.format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord('q'): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): cam_w, cam_h = 640, 480 default_model_dir = "./all_models" default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=5, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.5, help='class score threshold') args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) print("Loading %s with %s labels." % (args.model, args.labels)) engine = DetectionEngine(args.model) labels = load_labels(args.labels) pygame.init() pygame.font.init() font = pygame.font.SysFont("Arial", 20) pygame.camera.init() camlist = pygame.camera.list_cameras() _, w, h, _ = engine.get_input_tensor_shape() camera = pygame.camera.Camera(camlist[0], (cam_w, cam_h)) display = pygame.display.set_mode((cam_w, cam_h), 0) red = pygame.Color(255, 0, 0) camera.start() try: last_time = time.monotonic() while True: mysurface = camera.get_image() imagen = pygame.transform.scale(mysurface, (w, h)) input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8) start_time = time.monotonic() results = engine.DetectWithInputTensor(input, threshold=args.threshold, top_k=args.top_k) stop_time = time.monotonic() inference_ms = (stop_time - start_time) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = "Inference: %5.2fms FPS: %3.1f" % (inference_ms, fps_ms) for result in results: x0, y0, x1, y1 = result.bounding_box.flatten().tolist() rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w, (y1 - y0) * cam_h) pygame.draw.rect(mysurface, red, rect, 1) label = "%.0f%% %s" % (100 * result.score, labels[result.label_id]) text = font.render(label, True, red) mysurface.blit(text, (x0 * cam_w, y0 * cam_h)) text = font.render(annotate_text, True, red) mysurface.blit(text, (0, 0)) display.blit(mysurface, (0, 0)) pygame.display.flip() finally: camera.stop()
'--output', help='File path of the output image.') args = parser.parse_args() if not args.output: output_name = 'object_detection_result.jpg' else: output_name = args.output # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None with picamera.PiCamera() as camera: camera.resolution = (1028, 712) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) # cv2.imwrite('current_frame.jpg', input) # img = Image.open('current_frame.jpg') # draw = ImageDraw.Draw(img) start_ms = time.time() ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.') args = parser.parse_args() labels = dataset_utils.read_label_file(args.label) if args.label else None engine = DetectionEngine(args.model) with picamera.PiCamera() as camera: preview_size = (640, 480) camera.resolution = preview_size camera.framerate = 30 # camera.hflip = True # camera.vflip = True # camera.rotation = 90 _, input_height, input_width, _ = engine.get_input_tensor_shape() input_size = (input_width, input_height) # Width is rounded up to the nearest multiple of 32, # height to the nearest multiple of 16. capture_size = (math.ceil(input_width / 32) * 32, math.ceil(input_height / 16) * 16) # Actual detection area on preview. detect_size = (preview_size[0] * input_size[0] / capture_size[0], preview_size[1] * input_size[1] / capture_size[1]) # Make annotator smaller for efficiency. annotator_factor = 0.5 annotator_size = (int(preview_size[0] * annotator_factor), int(preview_size[1] * annotator_factor)) # Font for drawing detection candidates font = ImageFont.truetype( '/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf', size=12) camera.start_preview() annotator = Annotator(camera, dimensions=annotator_size, default_color=(255, 255, 255, 64)) def annotate(candidates): annotator.clear() # Get actual coordinates to draw def translate(relative_coord): return (detect_size[0] * relative_coord[0] * annotator_factor, detect_size[1] * relative_coord[1] * annotator_factor) for c in candidates: top_left = translate(c.bounding_box[0]) bottom_right = translate(c.bounding_box[1]) annotator.bounding_box(top_left + bottom_right) text = '{} {:.2f}'.format(labels[c.label_id], c.score) \ if labels else '{:.2f}'.format(c.score) annotator.text(top_left, text, font=font) annotator.update() try: stream = io.BytesIO() for _ in camera.capture_continuous( stream, format='rgb', use_video_port=True, resize=capture_size): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) if input_size != capture_size: # Crop to input size. Note dimension order (height, width, channels) input_tensor = input_tensor.reshape( (capture_size[1], capture_size[0], 3))[ 0:input_height, 0:input_width, :].ravel() start_ms = time.time() results = engine.detect_with_input_tensor(input_tensor, top_k=3) elapsed_ms = time.time() - start_ms annotate(results) camera.annotate_text = '{:.2f}ms'.format(elapsed_ms * 1000.0) finally: # Maybe should make this an annotator method camera.remove_overlay(annotator._overlay) camera.stop_preview()