def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera cam = cv2.VideoCapture(camera) # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: ret, cv2_im = cam.read() #we are transforming the npimage to img, and the TPU library/utils are doing the #inverse process #The CV2 Way #pil_im = Image.fromarray(cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)) pil_im = Image.fromarray(np.uint8(cv2_im)).convert('RGB') #This is the tf utils way for the transformation. It needs numpy, and is slightly slower if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: print("{} object(s) detected".format(len(ans))) for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] if SHOW_CONFIDENCE_IN_LABEL: label = label + "({0:.2f})".format(obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) # flipping the image: #cv2.flip(cv2_im, 1) #resizing the image #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break #end exit()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera #cam = cv2.VideoCapture(camera) camera = PiCamera() time.sleep(2) camera.resolution = (640, 480) # Create the in-memory stream stream = io.BytesIO() # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: #ret, cv2_im = cam.read() stream = io.BytesIO() #wipe the contents camera.capture(stream, format='jpeg', use_video_port=True) stream.seek(0) pil_im = Image.open(stream) cv2_im = np.array(pil_im) cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] + " - {0:.2f}".format( obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #flipping the image: cv2.flip(cv2_im, 1) #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() exit() break
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera cam = cv2.VideoCapture(camera) # Initialize the screen BLACK = (0, 0, 0) WIDTH = 800 HEIGHT = 600 screen = pygame.display.set_mode((WIDTH, HEIGHT), 0, 32) screen.fill(BLACK) pygame.init() # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: ret, cv2_im = cam.read() #we are transforming the npimage to img, and the TPU library/utils are doing the #inverse process cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) #pil_im = Image.fromarray(np.uint8(cv2_im)).convert('RGB') #This is the tf utils way for the transformation. It needs numpy if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: print("{} object(s) detected".format(len(ans))) for obj in ans: if obj.score > 0.5: if labels: label = labels[obj.label_id] + " - {0:.2f}".format( obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f}".format(fps)) #flipping the image: cv2.flip(cv2_im, 1) # Just needed if you are using a webcam as a mirror #cv2_im = cv2.resize(cv2_im, (800, 600)) #put back the image into the screen using a pygame image pygameimage = pygame.image.frombuffer(cv2_im.tostring(), cv2_im.shape[1::-1], "RGB") screen.blit(pygameimage, (0, 0)) pygame.display.update() #end cv2.VideoCapture.release(cam)