def main(): ################################################## # Initialise LCD ## LCD = LCD_1in8.LCD() LCD = ST7735() ## Lcd_ScanDir = LCD_1in8.SCAN_DIR_DFT ## LCD.LCD_Init(Lcd_ScanDir) LCD.begin() ## screenbuf = Image.new("RGB", (LCD.LCD_Dis_Column, LCD.LCD_Dis_Page), "WHITE") screenbuf = Image.new("RGB", (DISPLAY_WIDTH, DISPLAY_HEIGHT), "WHITE") draw = ImageDraw.Draw(screenbuf) draw.text((33, 22), 'LCD Demo', fill="BLUE", font=FONT_SMALL) ## LCD.LCD_PageImage(screenbuf) LCD.display(screenbuf) ################################################## parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of .tflite file.', required=True) parser.add_argument('--labels', help='File path of labels file.', required=True) args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter(args.model) interpreter.allocate_tensors() _, height, width, _ = interpreter.get_input_details()[0]['shape'] cameraW = 640 cameraH = 480 frameTime = time.time() * 1000 with picamera.PiCamera(resolution=(cameraW, cameraH), framerate=30) as camera: camera.start_preview(alpha=255) camera.annotate_foreground = Color('black') camera.annotate_background = Color('white') try: stream = io.BytesIO() for _ in camera.capture_continuous(stream, format='jpeg', use_video_port=True): stream.seek(0) image = Image.open(stream).convert('RGB').resize( (width, height), Image.ANTIALIAS) start_time = time.time() results = classify_image(interpreter, image) elapsed_ms = (time.time() - start_time) * 1000 stream.seek(0) stream.truncate() msg = "" for i in range(10): label = labels[results[1][i] + 1] prob = clamp(0, results[2][i], 1) top = clamp(0, results[0][i][0], 1) left = clamp(0, results[0][i][1], 1) bottom = clamp(0, results[0][i][2], 1) right = clamp(0, results[0][i][3], 1) msg += ( "{0:20} {1:3.1f}% {2:3.3f} {3:3.3f} {4:3.3f} {5:3.3f} {6: 5.1f}ms\n" .format(label, prob * 100, top, left, bottom, right, elapsed_ms)) draw.rectangle([(0, 0), (160, 128)], fill="WHITE") ## LCD.LCD_PageImage(screenbuf) screenbuf.paste(image.resize((DISPLAY_WIDTH, DISPLAY_HEIGHT))) # draw.rectangle([(0,0),(160,128)], outline = "RED") draw.text((0, 0), msg, fill="BLUE", font=FONT_SMALL) LCD.display(screenbuf) msg += ("--------------------------------------------------\n") print(msg) #pdb.set_trace() bestIdx = np.argmax(results[2]) label = labels[results[1][bestIdx] + 1] prob = clamp(0, results[2][bestIdx], 1) top = clamp(0, results[0][bestIdx][0], 1) left = clamp(0, results[0][bestIdx][1], 1) bottom = clamp(0, results[0][bestIdx][2], 1) right = clamp(0, results[0][bestIdx][3], 1) camera.annotate_text = '%s (%.1f%%)\n%.1fms' % ( label, prob * 100, elapsed_ms) finally: camera.stop_preview() LCD.LCD_Clear()
class ObjectDetectorLite: def __init__(self, model_path, label_path): """ Builds Tensorflow graph, load model and labels """ # Load label_map self._load_label(label_path) # Define lite graph and Load Tensorflow Lite model into memory self.interpreter = Interpreter(model_path=model_path) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Get input size input_shape = self.input_details[0]['shape'] self.size = input_shape[:2] if len(input_shape) == 3 else input_shape[1:3] def get_input_size(self): return self.size def detect(self, image, threshold=0.1): """ Predicts person in frame with threshold level of confidence Returns list with top-left, bottom-right coordinates and list with labels, confidence in % """ # Add a batch dimension frame = np.expand_dims(image, axis=0) # run model self.interpreter.set_tensor(self.input_details[0]['index'], frame) self.interpreter.invoke() # get results boxes = self.interpreter.get_tensor(self.output_details[0]['index']) classes = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) num = self.interpreter.get_tensor(self.output_details[3]['index']) # Find detected boxes coordinates return self._boxes_coordinates(image, np.squeeze(boxes[0]), np.squeeze(classes[0]+1).astype(np.int32), np.squeeze(scores[0]), min_score_thresh=threshold) def close(self): pass def _boxes_coordinates(self, image, boxes, classes, scores, max_boxes_to_draw=20, min_score_thresh=.5): """ This function groups boxes that correspond to the same location and creates a display string for each detection Args: image: uint8 numpy array with shape (img_height, img_width, 3) boxes: a numpy array of shape [N, 4] classes: a numpy array of shape [N] scores: a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores. max_boxes_to_draw: maximum number of boxes to visualize. If None, draw all boxes. min_score_thresh: minimum score threshold for a box to be visualized """ if not max_boxes_to_draw: max_boxes_to_draw = boxes.shape[0] number_boxes = min(max_boxes_to_draw, boxes.shape[0]) detected_boxes = [] probabilities = [] categories = [] for i in range(number_boxes): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) detected_boxes.append(box) probabilities.append(scores[i]) categories.append(self.category_index[classes[i]]) return np.array(detected_boxes), probabilities, categories def _load_label(self, path): """ Loads labels """ categories = load_labelmap(path) self.category_index = create_category_index(categories)
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # ONNX Runtime: *.onnx # ONNX OpenCV DNN: *.onnx with --dnn # OpenVINO: *.xml # CoreML: *.mlmodel # TensorRT: *.engine # TensorFlow SavedModel: *_saved_model # TensorFlow GraphDef: *.pb # TensorFlow Lite: *.tflite # TensorFlow Edge TPU: *_edgetpu.tflite from models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type( w) # get backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: names = yaml.safe_load(f)['names'] # class names if pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') cuda = torch.cuda.is_available() check_requirements( ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider' ] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements( ('openvino-dev', ) ) # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie core = ie.IECore() if not Path(w).is_file(): # if not *.xml w = next(Path(w).glob( '*.xml')) # get *.xml file from *_openvino_model dir network = core.read_network( model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths executable_network = core.load_network(network, device_name='CPU', num_requests=1) elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) trt_fp16_input = False logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty( shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) if model.binding_is_input(index) and dtype == np.float16: trt_fp16_input = True binding_addrs = OrderedDict( (n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) if saved_model: # SavedModel LOGGER.info( f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf keras = False # assume TF1 saved_model model = tf.keras.models.load_model( w) if keras else tf.saved_model.load(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info( f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped ge = x.graph.as_graph_element return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) gd = tf.Graph().as_graph_def() # graph_def gd.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0") elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info( f'Loading {w} for TensorFlow Lite Edge TPU inference...' ) delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll' }[platform.system()] interpreter = Interpreter( model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # Lite LOGGER.info( f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs elif tfjs: raise Exception( 'ERROR: YOLOv5 TF.js inference is not supported') self.__dict__.update(locals()) # assign all variables to self
def load_wakeword_model(model_file: str) -> Interpreter: interpreter = Interpreter(model_file) interpreter.allocate_tensors() return interpreter
def main(): history = [] state = '' previous_state = '' parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--model', help='File path of .tflite file.', default=os.path.join(dirname, 'model/model.tflite')) parser.add_argument( '--labels', help='File path of labels file.', default=os.path.join(dirname, 'model/labels.txt')) args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter(args.model) interpreter.allocate_tensors() _, height, width, _ = interpreter.get_input_details()[0]['shape'] with picamera.PiCamera(resolution=(600, 600), framerate=30) as camera: camera.iso = 100 camera.vflip = 1 camera.hflip = 1 camera.crop = (0.0, 0.3, 0.7, 0.5) #camera.exposure_mode = "sport" camera.exposure_compensation = 0 camera.exposure_mode = 'auto' camera.start_preview() try: stream = io.BytesIO() for _ in camera.capture_continuous( stream, format='jpeg', use_video_port=True): stream.seek(0) image = Image.open(stream).convert('RGB').resize((width, height), Image.ANTIALIAS) start_time = time.time() results = classify_image(interpreter, image) elapsed_ms = (time.time() - start_time) * 1000 label_id, prob = results[0] stream.seek(0) stream.truncate() prob = int(round(prob,2)*100) if prob > 70: history.append(labels[label_id]) history = history[-5:] print(history) if history.count(history[0]) == len(history): state = history[0] # on considère que si 10 détections indiquent la meme info on peut s'y fier if previous_state != state: if state == 'publicite': print("on coupe le son") os.system('/usr/local/bin/irsend SEND_ONCE TV KEY_MUTE -#2') time.sleep(0.1) os.system('/usr/local/bin/irsend SEND_ONCE TV KEY_VOLUMEDOWN -#2') elif previous_state == 'publicite': print("on remet le son") os.system('/usr/local/bin/irsend SEND_ONCE TV KEY_MUTE -#2') time.sleep(0.1) os.system('/usr/local/bin/irsend SEND_ONCE TV KEY_VOLUMEUP -#2') previous_state = state sys.stdout.flush() camera.annotate_text = '%s\nscore:%s%%' % (labels[label_id], prob) finally: GPIO.output(40, GPIO.LOW) print("fin") camera.stop_preview()
def get_item_dictionary(): # Define and parse input arguments parser = argparse.ArgumentParser() # parser.add_argument('--modeldir', help='Folder the .tflite file is located in', # required=True) parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None) parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None) parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = "Sample_TFLite_model" GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) use_TPU = args.edgetpu # Parse input image name and directory. IM_NAME = args.image IM_DIR = args.imagedir # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.') sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): import picamera # print("about to take a photo") with picamera.PiCamera() as camera: camera.resolution = (1280,720) camera.capture("/home/pi/Desktop/tflite1/test_picam.jpg") # print("taken photo") IM_NAME = '/home/pi/Desktop/tflite1/CS190_P2/test_image.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/detect.tflite' # Path to label map file PATH_TO_LABELS = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/labelmap.txt' # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del(labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 result_label = [] # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'],input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * imH))) xmin = int(max(1,(boxes[i][1] * imW))) ymax = int(min(imH,(boxes[i][2] * imH))) xmax = int(min(imW,(boxes[i][3] * imW))) cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2) # Draw label object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index if object_name == 'potted plant': object_name = 'pineapple' label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%' result_label.append(object_name) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # All the results have been drawn on the image, now display the image # cv2.imshow('Object detector', image) cv2.imwrite("detected_test.jpg", image) print(Counter(result_label)) # Press any key to continue to next image, or press 'q' to quit # # if cv2.waitKey(0) == ord('q'): # break # Clean up # cv2.destroyAllWindows() return dict(Counter(result_label))
def objectsCount(MODEL_NAME, GRAPH_NAME, LABELMAP_NAME, min_conf_threshold, use_TPU, IM_NAME, IM_DIR): import os import cv2 import numpy as np import sys import glob import importlib.util # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print( 'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.' ) sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): IM_NAME = 'test1.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 objects_list = { } #create the dictionary where the traffic names and number of cars detected will be saved # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) objects_count = 0 #instantiate detected object counts # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index if (object_name == 'car'): objects_count = objects_count + 1 #get the count of cars detected in the image objects_list[image_path] = objects_count return (objects_list)
def setup_ssd_edgetpu(modelParas): # Get Args MODEL_NAME = modelParas[0] GRAPH_NAME = modelParas[1] LABELMAP_NAME = modelParas[2] min_conf_threshold = float(modelParas[3]) resW, resH = modelParas[4:6] imW, imH = int(resW), int(resH) use_TPU = modelParas[6] # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 tfParas = [ height, width, floating_model, labels, input_mean, input_std, input_details, min_conf_threshold, imH, imW, interpreter, output_details ] return tfParas
self.stopped = True imW, imH = 640, 480 CWD_PATH = os.getcwd() EDGE_TPU = False if EDGE_TPU: face_model_path = 'model/face-detector-quantized_edgetpu.tflite' face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path), experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: face_model_path = 'model/face_detection_front.tflite' face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path)) face_interpreter.allocate_tensors() # Get model details face_input_details = face_interpreter.get_input_details()[0] face_output_details = face_interpreter.get_output_details() height = face_input_details['shape'][1] width = face_input_details['shape'][2] # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH)).start() time.sleep(1) anchors = np.load('anchors.npy')
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', help='File path of .tflite file.', required=True) parser.add_argument('--labels', help='File path of labels file.', required=True) args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter(args.model) interpreter.allocate_tensors() _, height, width, _ = interpreter.get_input_details()[0]['shape'] s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) host = socket.gethostbyname(socket.gethostname()) print(host) port = 9081 s.bind((host, port)) print('Starting server on', host, port) print('The Web server URL for this would be http://%s:%d/' % (host, port)) s.listen(5) c, (client_host, client_port) = s.accept() print('Got connection from', client_host, client_port) backSub = cv2.createBackgroundSubtractorKNN() # backSub = cv2.createBackgroundSubtractorMOG2() top, right, bottom, left = 20, 150, 400, 450 count = 0 with picamera.PiCamera(resolution=(640, 480)) as camera: # with picamera.PiCamera(framerate=30) as camera: camera.start_preview() # rawcap = PiRGBArray(camera, size=(640,480)) try: stream = io.BytesIO() for frame in camera.capture_continuous(stream, format='jpeg', use_video_port=True): stream.seek(0) # image1 = frame.array # cv2.imshow("kk",image1) # key = cv2.waitKey(1) & 0xFF # rawCapture.truncate(0) # if key == ord("q"): # break image1 = np.array(Image.open(stream).convert('RGB')) # cv2.imwrite(r"/home/pi/Desktop/casp/img%d.jpg"%count,image1) # image1 = cv2.imread(image2) roi = image1[top:bottom, right:left] # cv2.imwrite(r"/home/pi/Desktop/casp/roi%d.jpg"%count,roi) count += 1 image2 = backSub.apply(cv2.flip(roi, 1)) # image2 = cv2.resize(image2,(30,30)) # cv2.imshow("kk",image2) # image3 = backSub.apply(roi) image2 = cv2.resize(image2, (width, height)) image2 = cv2.cvtColor(image2, cv2.COLOR_GRAY2BGR) cv2.imwrite(r"/home/pi/Desktop/casp/img%d.jpg" % count, image2) img = np.expand_dims(image2, axis=0) img = (np.float32(img) - 127.5) / 127.5 # cv2.imshow("kk",image) start_time = time.time() results = classify_image(interpreter, img) elapsed_ms = (time.time() - start_time) * 1000 label_id, prob = results[0] print(label_id) stream.seek(0) stream.truncate() try: kk = d1[str(label_id)] except: kk = '' c.sendall(str.encode("HTTP/1.0 200 OK\n", 'iso-8859-1')) c.sendall(str.encode('Content-Type: text/html\n', 'iso-8859-1')) c.send(str.encode('\r\n')) str1 = b"<html><body style='background-color:black;'><style>.center {margin: 0;position: absolute;top: 50%;left: 50%;-ms-transform: translate(-50%, -50%);transform: translate(-50%, -50%);}</style><font color='red' size='+7'><div class='center'><h1><center>" str2 = b"</center></h1></div></body></font></html>" str3 = str.encode(kk) c.send(str1 + str3 + str2) time.sleep(0.1) c.send( b'<script type="text/javascript">document.body.innerHTML = "";</script>' ) finally: camera.stop_preview() c.close()
class Detection: def __init__(self): self.MODEL_NAME = "detect" self.GRAPH_NAME = "detect.tflite" self.LABELMAP_NAME = "label_map.txt" self.min_conf_threshold = 0.70 self.resW, self.resH = (1280, 720) self.imW, self.imH = int(self.resW), int(self.resH) # self.use_TPU = (True if 'projects' in str(os.getcwd()) else False) self.use_TPU = False self.frame_rate_calc = None self.item_detected = False self.latest_item = None self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] # Import TFLite requirements self.pkg = importlib.util.find_spec('tflite_runtime') if self.pkg: from tflite_runtime.interpreter import Interpreter if self.use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if self.use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if self.use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (self.GRAPH_NAME == 'detect.tflite'): self.GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() PATH_TO_CKPT = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(self.GRAPH_NAME) PATH_TO_LABELS = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format( self.LABELMAP_NAME) PATH_TO_OBJ_NAMES = "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names" # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Fix for potential label map issue if self.labels[0] == '???': del (self.labels[0]) if self.use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() print("Model loaded and tensors allocated") # Get model details self.input_details = self.interpreter.get_input_details() #print("Input details: {}".format(self.input_details)) self.output_details = self.interpreter.get_output_details() #print("Output detais: {}".format(self.output_details)) self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # Initialize frame rate calculation self.frame_rate_calc = 1 self.freq = cv2.getTickFrequency() # Initialize video stream self.videostream = VideoStream(resolution=(self.imW, self.imH)) self.videostream = self.videostream.start() def filter_boxes(self, box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])): scores_max = tf.math.reduce_max(scores, axis=-1) mask = scores_max >= score_threshold class_boxes = tf.boolean_mask(box_xywh, mask) pred_conf = tf.boolean_mask(scores, mask) class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]]) pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]]) box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1) input_shape = tf.cast(input_shape, dtype=tf.float32) box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] box_mins = (box_yx - (box_hw / 2.)) / input_shape box_maxes = (box_yx + (box_hw / 2.)) / input_shape boxes = tf.concat([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ], axis=-1) # return tf.concat([boxes, pred_conf], axis=-1) return (boxes, pred_conf) def read_class_names(self, class_file_name): names = {} with open(class_file_name, 'r') as data: for ID, name in enumerate(data): names[ID] = name.strip('\n') return names # TODO: Definde cfg.YOLO.CLASSES def draw_bbox(self, image, bboxes, classes, show_label=True): num_classes = len(classes) image_h, image_w, _ = image.shape hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(0) random.shuffle(colors) random.seed(None) out_boxes, out_scores, out_classes, num_boxes = bboxes for i in range(num_boxes[0]): if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue coor = out_boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) fontScale = 0.5 score = out_scores[0][i] class_ind = int(out_classes[0][i]) bbox_color = colors[class_ind] bbox_thick = int(0.6 * (image_h + image_w) / 600) c1, c2 = (coor[1], coor[0]), (coor[3], coor[2]) cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3) cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) # filled cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) return image def perform(self): while True: t1 = cv2.getTickCount() frame1 = self.videostream.read() print("Frame read from stream") frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) # input_data = np.expand_dims(frame_resized, axis=0) image_data = cv2.resize(frame, (608, 608)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) # if self.floating_model: # input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], images_data) print("Performing detection") self.interpreter.invoke() print("Detection performed") pred = [self.interpreter.get_tensor(self.output_details[i]['index']) for i in range(len(self.output_details))] boxes, pred_conf = self.filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([608, 608])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.3, # TODO: Make var score_threshold=0.3 # TODO: Make var ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] class_names = self.read_class_names( "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names") print("Drawing bounding boxes") frame = self.draw_bbox(frame, pred_bbox, class_names) #frame = Image.fromarray(frame.astype(np.uint8)) # cv2.imshow('Object detector',frame.astype(np.uint8)) time.sleep(5) image = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB) if cv2.waitKey(1) == ord('x'): break if self.item_detected: break return self.item_detected, self.latest_item def run(self, cloud=False): #while True: # for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = self.videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) if cloud: # TODO: Send image to cloud and get data back content_type = 'image/jpeg' headers = {'content-type': content_type} _, img_encoded = cv2.imencode('.jpg', frame_rgb) request_address = "http://a24dcb00998c.ngrok.io/api/detect" # send http request with image and receive response print("Sending image to cloud api and awaiting response") response = requests.post(request_address, data=img_encoded.tostring(), headers=headers) print("Response received:") print(json.loads(response.text)) else: input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) #print("Detection started") self.interpreter.invoke() #print("Detection complete") # Retrieve detection results #print(self.output_details) boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding coordinates of objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects num = self.interpreter.get_tensor(self.output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) max_score = 0 # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Specify that item has been detected #self.item_detected = True #if scores[i] > max_score: #max_score = scores[i] #self.latest_item = self.labels[int(classes[i])] # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.imH))) xmin = int(max(1, (boxes[i][1] * self.imW))) ymax = int(min(self.imH, (boxes[i][2] * self.imH))) xmax = int(min(self.imW, (boxes[i][3] * self.imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index self.increase_detection_counter(object_name, scores[i]) label = '%s: %d%%' % (object_name, int(scores[i] * 100)) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(self.frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) if cv2.waitKey(1) == ord('x'): cv2.destroyAllWindows() #break # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / self.freq self.frame_rate_calc = 1 / time1 self.item_detected, self.latest_item = self.get_object_with_score_five() if self.item_detected: self.reset_detection_counter() return self.item_detected, self.latest_item def increase_detection_counter(self, detected_item, score): for object in self.detection_counter: if object["name"] == detected_item: object["counter"]+=score def get_object_with_score_five(self): max_score = 0 latest_object = "None" detected_object = False for object in self.detection_counter: if object["counter"] >= 5 and object["counter"] > max_score: latest_object = object["name"] detected_object = True max_score = object["counter"] return detected_object, latest_object def reset_detection_counter(self): self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] def destroy(self): # Clean up cv2.destroyAllWindows() self.videostream.stop()
class ImageDetection: def __init__(self, modeldir): GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, modeldir, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, modeldir, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) self.min_conf_threshold = 0.6 self.input_mean = 127.5 self.input_std = 127.5 self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) def detect(self, image_path): image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) im_h, im_w, _ = image.shape image_resized = cv2.resize(image_rgb, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] classes = self.interpreter.get_tensor( self.output_details[1]['index'])[0] scores = self.interpreter.get_tensor( self.output_details[2]['index'])[0] detect_text = "" for i in range(len(scores)): if self.min_conf_threshold < scores[i] <= 1.0: ymin = int(max(1, (boxes[i][0] * im_h))) xmin = int(max(1, (boxes[i][1] * im_w))) ymax = int(min(im_h, (boxes[i][2] * im_h))) xmax = int(min(im_w, (boxes[i][3] * im_w))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) object_name = self.labels[int(classes[i])] label = '%s: %d%%' % (object_name, int(scores[i] * 100)) label_size, base_line = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max(ymin, label_size[1] + 10) cv2.rectangle( image, (xmin, label_ymin - label_size[1] - 10), (xmin + label_size[0], label_ymin + base_line - 10), (255, 255, 255), cv2.FILLED) cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) detect_text = detect_text + " " + object_name cv2.imshow('Detector', image) os.system('echo %s | festival --tts & ' % detect_text) sleep(5) cv2.destroyAllWindows() return
def detectPenKey(img): # parser = argparse.ArgumentParser() # parser.add_argument('--modeldir', help='Folder the .tflite file is located in', # default='models\\model_objDetec\\penKeyModel') # parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', # default='model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite') # parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', # default='labels.txt') # parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', # default=0.5) # parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', # default=None) # parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', # default=None) # parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', # action='store_true') # # args = parser.parse_args() listOfObjDetec = [] MODEL_NAME = "models\\model_objDetec\\penKeyModel" GRAPH_NAME = "model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite" LABELMAP_NAME = "labels.txt" min_conf_threshold = float(0.5) use_TPU = False # # Parse input image name and directory. # IM_NAME = args.image # IM_DIR = args.imagedir # # # If both an image AND a folder are specified, throw an error # if (IM_NAME and IM_DIR): # print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.') # sys.exit() # # # If neither an image or a folder are specified, default to using 'test1.jpg' for image name # if (not IM_NAME and not IM_DIR): # IM_NAME = 'keys11.jpg' # # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames # if IM_DIR: # PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR) # images = glob.glob(PATH_TO_IMAGES + '/*') # # elif IM_NAME: # PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME) # images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Loop over every image and perform detection # for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = img image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: print("hello") input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # getting label/class object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index print("detected:", object_name, ":", int(scores[i] * 100)) listOfObjDetec.append(object_name) # debug ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label # object_name = labels[int(classes[i])] # Look up object name from "labels" array using class label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(img, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(img, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text if listOfObjDetec: print(listOfObjDetec) objDict = dict(Counter(listOfObjDetec)) print(objDict) strg = "Detected " for i in objDict: print(i) strg += "" + str(objDict[i]) + " " + i + ", " print(strg) # All the results have been drawn on the image, now display the image cv2.imshow('Object detector', img) cv2.waitKey(0) cv2.destroyAllWindows() # Press any key to continue to next image, or press 'q' to quit return strg else: return "No Objects Detected"
def approximation(limit): detect = 0 MODEL_NAME = 'obj_detection_tflite' GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = 0.6 imW, imH = 1280, 720 pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 pi_camera = PiCamera(resolution=(imW, imH), framerate=30).start() time.sleep(1) p_height = 0 p_width = 0 detections = 0 approximation_detected = False timer_mark = timer_start = time.time() while timer_mark - timer_start < limit: print(timer_mark - timer_start) frame1 = pi_camera.read() frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detections objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detections objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detections objects for i in range(len(scores)): if (scores[i] > min_conf_threshold) and (scores[i] <= 1.0): y_min = int(max(1, (boxes[i][0] * imH))) x_min = int(max(1, (boxes[i][1] * imW))) y_max = int(min(imH, (boxes[i][2] * imH))) x_max = int(min(imW, (boxes[i][3] * imW))) object_name = labels[int(classes[i])] if object_name == 'car' or object_name == 'bus' or object_name == 'truck': detections += 1 if (y_max - y_min) > p_height * 1.15 or (x_max - x_min) > p_width * 1.15\ and detections > 1: play_sound_notification("waiting") limit += 3 p_height = y_max - y_min p_width = x_max - x_min timer_mark = time.time() cv2.destroyAllWindows() pi_camera.stop()
def main(): # de aqui para asignar la ruta del modelo desde la terminal parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', help='File path of .tflite file.', required=True) parser.add_argument('--labels', help='File path of labels file.', required=True) args = parser.parse_args() #pathLabels=('/home/pi/Documents/labels.txt') #pathTflite=('/home/pi/Documents/ultimo.tflite') labels = load_labels(args.labels) interpreter = Interpreter(args.model) interpreter.allocate_tensors() camera = PiCamera() camera.resolution = (640, 480) camera.framerate = 30 rawCapture = PiRGBArray(camera, size=(640, 480)) stream = io.BytesIO() for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): image = frame.array frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) Imagen_normed = frame / 255 Imagen_show = cv2.resize(image, (600, 600)) Imagen_resized = cv2.resize(Imagen_normed, (224, 224)) Imagen_espnaded = np.expand_dims(Imagen_resized, axis=0) start_time = time.time() results = clasificar_imagen(interpreter, Imagen_espnaded) elapsed_ms = round((time.time() - start_time) * 1000, 2) label_id, prob = results[0] print(labels[label_id] + " " + str(elapsed_ms)) font = cv2.FONT_HERSHEY_SIMPLEX color = (14, 129, 60) images = cv2.putText( Imagen_show, str(elapsed_ms) + "ms " + labels[label_id] + " " + str(prob), (00, 100), font, 1, color, 2, cv2.LINE_AA) cv2.imshow("Frame", images) if cv2.waitKey(1) & 0xFF == ord('q'): break rawCapture.truncate(0) cv2.destroyAllWindows()
def main(): # 入力変数(配列)設定 -> parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='cnn.tflite', help='File path of .tflite file.') parser.add_argument('-i', '--input', default='dog.jpg', help='Image to be classified.') parser.add_argument('-l', '--labels', default='animal_labels.txt', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() # 入力変数(配列)設定 <- # ラベル読み込み labels = read_label_file(args.labels) if args.labels else {} # モデル読み込み。Coral使用、未使用でモデル異なる interpreter = Interpreter(*args.model.split('@')) # 推論用メモリ確保。モデル読み込み直後に実行必須 interpreter.allocate_tensors() size = common.input_size(interpreter) # 入力ファイルをRGB変換しinterpreterサイズに変更 image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) # interpreterに入力イメージをセット common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') # 入力変数(配列)で指定した回数分推論を繰り返す for _ in range(args.count): start = time.perf_counter() # 推論時間測定開始 interpreter.invoke() # 推論 inference_time = time.perf_counter() - start # 推論時間測定終了 # 入力変数(配列)で指定した一致率(args.threshold)以上のラベルの上位args.top_kを取得する classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) # 推論時間表示 print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def process_frame(frame): global entry, lime_count, marker_count, lime_sizes, found_list, total_marker_width, pixel_per_metric interpreter = Interpreter(model_path=PATH_TO_CKPT, num_threads=4) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #frame_rgb = frame frame_resized = cv2.resize(frame_rgb, (width, height)) #frame_resized = cv2.resize(frame_rgb, (480, 320)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input try: start_time = time.time() interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() elapsed_time.append(time.time() - start_time) except: print('Thread Error: interpreter not reference') # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # counting objects and measure diameter of lime if xmin < LINE2 and xmax > LINE1 and not entry: entry = True if entry and xmax <= LINE1: entry = False if (int(classes[i]) + 1 == 1): lime_found = time.time() - start_total_time try: lime_count += 1 lime_diameter = ( (xmax - xmin) + (ymax - ymin)) / (2 * pixel_per_metric) lime_sizes.append(lime_diameter) found_list.append(lime_found) print( f'lime {lime_count} is found at {lime_found}, Diameter(size): {lime_diameter * 1000:.3f} mm' ) except: # marker must came first for calculating pixel/metric lime_count -= 1 marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER elif (int(classes[i]) + 1 == 2): marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER # insert Lime Count information text font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( frame, 'Lime Count: ' + str(lime_count), (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # insert Marker Count information text cv2.putText( frame, 'Marker Count: ' + str(marker_count), (10, 55), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # overlay with line pt1 = (LINE1, 0) pt2 = (LINE1, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) pt1 = (LINE2, 0) pt2 = (LINE2, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) frame = cv2.resize(frame, (480, 320)) return frame
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of .tflite file.', required=True) parser.add_argument('--labels', help='File path of labels file.', required=True) args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter(args.model) interpreter.allocate_tensors() _, height, width, _ = interpreter.get_input_details()[0]['shape'] cameraW = 640 cameraH = 480 def clampW(x): return clamp(0, x, cameraW - 1) def clampH(x): return clamp(0, x, cameraH - 1) ov = np.zeros((cameraH, cameraW, 3), dtype=np.uint8) ov[:, :, :] = 0 frameTime = time.time() * 1000 overlayInterval = 100 with picamera.PiCamera(resolution=(cameraW, cameraH), framerate=30) as camera: camera.start_preview(alpha=255) camera.annotate_foreground = Color('black') camera.annotate_background = Color('white') overlay = camera.add_overlay(ov.tobytes(), layer=3, alpha=64) try: stream = io.BytesIO() for _ in camera.capture_continuous(stream, format='jpeg', use_video_port=True): stream.seek(0) image = Image.open(stream).convert('RGB').resize( (width, height), Image.ANTIALIAS) start_time = time.time() results = classify_image(interpreter, image) elapsed_ms = (time.time() - start_time) * 1000 stream.seek(0) stream.truncate() bestIdx = np.argmax(results[2]) label = labels[results[1][bestIdx] + 1] prob = results[2][bestIdx] top = int(np.round(results[0][bestIdx][0] * cameraH)) left = int(np.round(results[0][bestIdx][1] * cameraW)) bottom = int(np.round(results[0][bestIdx][2] * cameraH)) right = int(np.round(results[0][bestIdx][3] * cameraW)) ov[:, :, :] = 0 if top >= 0 and top < cameraH: ov[top, clampW(left):clampW(right), :] = 0xff if bottom >= 0 and bottom < cameraH: ov[bottom, clampW(left):clampW(right), :] = 0xff if left >= 0 and left < cameraW: ov[clampH(top):clampH(bottom), left, :] = 0xff if right >= 0 and right < cameraW: ov[clampH(top):clampH(bottom), right, :] = 0xff if time.time() * 1000 - frameTime > overlayInterval: overlay.update(ov.tobytes()) frameTime = time.time() * 1000 #pdb.set_trace() camera.annotate_text = '%s (%.1f%%)\n%.1fms' % ( label, prob * 100, elapsed_ms) finally: camera.remove_overlay(overlay) camera.stop_preview()
def object_detection(): label_out = [] mid_x_out = [] mid_y_out = [] class VideoStream: """Camera object that controls video streaming from the Picamera""" def __init__(self, resolution=(640, 480), framerate=30): # Initialize the PiCamera and the camera image stream self.stream = cv2.VideoCapture(0) ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) ret = self.stream.set(3, resolution[0]) ret = self.stream.set(4, resolution[1]) # Read first frame from the stream (self.grabbed, self.frame) = self.stream.read() # Variable to control when the camera is stopped self.stopped = False def start(self): # Start the thread that reads frames from the video stream Thread(target=self.update, args=()).start() return self def update(self): # Keep looping indefinitely until the thread is stopped while True: # If the camera is stopped, stop the thread if self.stopped: # Close camera resources self.stream.release() return # Otherwise, grab the next frame from the stream (self.grabbed, self.frame) = self.stream.read() def read(self): # Return the most recent frame return self.frame def stop(self): # Indicate that the camera and thread should be stopped self.stopped = True # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument( '--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument( '--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument( '--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument( '--resolution', help= 'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') parser.add_argument( '--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = args.edgetpu # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: flag = 0 # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) cv2.circle(frame, (xmin, ymin), 5, (255, 255, 0), cv2.FILLED) cv2.circle(frame, (xmax, ymax), 5, (0, 255, 255), cv2.FILLED) x_diff = xmax - xmin y_diff = ymax - ymin mid_x = x_diff / 2 + xmin mid_x = math.ceil(mid_x) mid_y = ymin + y_diff / 2 mid_y = math.ceil(mid_y) cv2.circle(frame, (0, 0), 5, (0, 0, 255), cv2.FILLED) cv2.circle(frame, (mid_x, mid_y), 5, (255, 255, 255), cv2.FILLED) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text label_out.append(label) mid_x_out.append(mid_x) mid_y_out.append(mid_y) # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. #cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 (h, w) = frame.shape[:2] cv2.waitKey(100) break # Clean up cv2.destroyAllWindows() videostream.stop() return (label_out, mid_x_out, mid_y_out, h / 2, w / 2)
class DetectorTFLite: def __init__(self, path_to_checkpoint, path_to_labelmap, filter_labels=None): self.filter_labels = filter_labels with open(path_to_labelmap, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del (self.labels[0]) self.interpreter = Interpreter(model_path=path_to_checkpoint) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.tf_height = self.input_details[0]['shape'][1] self.tf_width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 def ExtractBoxes(self, imH, imW, boxes, classes, scores): det_boxes = [] for i in range(len(scores)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() miny = int(max(1, (boxes[i][0] * imH))) minx = int(max(1, (boxes[i][1] * imW))) maxy = int(min(imH, (boxes[i][2] * imH))) maxx = int(min(imW, (boxes[i][3] * imW))) label = self.labels[int(classes[i])] det_boxes.append((minx, miny, maxx, maxy, label, float(scores[i]))) return det_boxes def DetectFromImage(self, img): imH, imW, _ = img.shape # Acquire frame and resize to expected shape [1xHxWx3] frame_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.tf_width, self.tf_height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects return self.ExtractBoxes(imH, imW, boxes, classes, scores) def DisplayDetection(self, image, box, det_time=None): img = image.copy() x_min = box[0] y_min = box[1] x_max = box[2] y_max = box[3] cls = str(box[4]) score = str(np.round(box[-1], 2)) text = cls + ": " + score cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1) cv2.rectangle(img, (x_min, y_min - 20), (x_min, y_min), (255, 255, 255), -1) cv2.putText(img, text, (x_min + 5, y_min - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) if det_time != None: fps = round(1000. / det_time, 1) fps_txt = str(fps) + " FPS" cv2.putText(img, fps_txt, (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) return img
def gen_frames(): # Define VideoStream class to handle streaming of video from webcam in separate processing thread # Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/ class VideoStream(object): """Camera object that controls video streaming from the Picamera""" def __init__(self,resolution=(640,480),framerate=30,target=None,args=()): global capture_image_limit capture_image_limit = 2000 global file_save_id file_save_id =0 # Initialize the PiCamera and the camera image stream self.stream = cv2.VideoCapture(0) #VideoStream Instance instance = VideoStream.__qualname__ print('The class instance is: ',instance) #print('\nVIDEOSTREAM: locals() value inside class\n', locals()) #print(dir(VideoStream)) #Reload reloadClass = os.environ.get('reload') if reloadClass == 'True': print('Delete Self:') del self os.environ['reload'] = 'False' ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) ret = self.stream.set(3,resolution[0]) ret = self.stream.set(4,resolution[1]) # Read first frame from the stream (self.grabbed, self.frame) = self.stream.read() # Variable to control when the camera is stopped self.stopped = False def __del__(self): print ("Object destroyed"); def start(self): # Start the thread that reads frames from the video stream Thread(target=self.update,args=()).start() return self def update(self): # Keep looping indefinitely until the thread is stopped while True: # If the camera is stopped, stop the thread if self.stopped: # Close camera resources self.stream.release() return # Otherwise, grab the next frame from the stream (self.grabbed, self.frame) = self.stream.read() def read(self): # Return the most recent frame this_instance = self return self.frame def stop(self): # Indicate that the camera and thread should be stopped self.stopped = True # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = args.modeldir print('~~~~ Param Default Model Name: ' + str(MODEL_NAME)) GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = args.edgetpu # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') print('TPU Runtime' + str(pkg)) if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory # Multi-Model # Demo90 /home/pi/SensorFusion/Demo90 # Deer: /home/pi/SensorFusion/PreLoadedModels/Model01.Deer # Head: /home/pi/SensorFusion/PreLoadedModels/Model02.Head # Eyes: /home/pi/SensorFusion/PreLoadedModels/Model03.Eyes # Tree: /home/pi/SensorFusion/PreLoadedModels/Model04.Tree # check.id - cd /home/pi/SensorFusion/checkid CWD_PATH = os.getcwd() print("Default Path: "+ CWD_PATH) newModel = str(os.environ.get('run_model')) print("New Model Name: "+ newModel) if newModel == "Demo90": CWD_PATH = "/home/pi/SensorFusion/"+ newModel elif newModel == 'Check.ID': CWD_PATH = "/home/pi/SensorFusion/checkid" else: CWD_PATH = "/home/pi/SensorFusion/PreLoadedModels/"+ newModel print("Current Model Path: "+ CWD_PATH) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME) print("Current Path to Label Map: "+ PATH_TO_LABELS) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del(labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument #if video_camera_flag:#Using a Flag here - for future use if use_TPU: interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print('TPU Detected' + PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) print('No TPU detected!'+ PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW,imH),framerate=30).start() time.sleep(1) global img_counter img_counter = 0 #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): try: while True: #while video_camera_flag: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] global frame frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'],input_data) interpreter.invoke() # Retrieve detection results person_found = False boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) #Kill TensofFlow while Annotating kill_tensorFlow = os.environ.get('kill_tensorFlow') #print("TensofFlow Status: " + str(kill_tensorFlow)) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * imH))) xmin = int(max(1,(boxes[i][1] * imW))) ymax = int(min(imH,(boxes[i][2] * imH))) xmax = int(min(imW,(boxes[i][3] * imW))) #print("Kill TF Flag: "+ str(kill_tensorFlow)) if kill_tensorFlow != 'True': try: cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 3) except: pass # Draw label (object_name) and score (%) object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index #print(labels[int(classes[i])]+": "+str(i)) if labels[int(classes[0])]== 'person':#NOTE - The bar is for one person only #print('Person Found!') person_found = True# used for bar below scores_flag = os.environ.get('scores_flag') labels_flag = os.environ.get('labels_flag') #states state_ = 11 #both on by default if labels_flag == 'labels_off' and scores_flag == 'scores_off': state_ = 0#00 label = object() if labels_flag == 'labels_on' and scores_flag == 'scores_on': state_ = 11#11 label = '%s: %d%%' % (object_name.capitalize(), int(scores[i]*100)) # Example: 'person: 72%' if labels_flag == 'labels_off' and scores_flag == 'scores_on': label = '%d%%' % (int(scores[i]*100)) # Example: '72%' state_ = 1#01 if labels_flag == 'labels_on' and scores_flag == 'scores_off': state_= 10 #10 label = '%s: ' % (object_name.capitalize()) # Example: 'person: ' #draw the labels, background score and box if state_ != 0: labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window #cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (237,237,237), cv2.FILLED) # Draw white box to put label text in if kill_tensorFlow != 'True': cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (128,128,128), cv2.FILLED) # Draw gray box to put label text in cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) # Draw label text else: if kill_tensorFlow != 'True': cv2.rectangle(frame, (xmin,ymin), (xmin,ymin), (237,237,237), cv2.FILLED) # Draw frame with no label OR score text ! # Draw framerate in corner of frame - use 'F' key to toggle on/off try: if fps_flag: cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA) else: pass except: pass #If Capture Image Draw status text capture_flag = os.environ.get('cap_flag') try: if capture_flag == "True": cv2.putText(frame,'Saving File: '+str(img_counter),(520,50),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,255),2) else: pass except: pass # All the results have been drawn on the frame, so it's time to display it. #cv2.imshow('Object detector', frame) ## Commented for the FLASK API #Module widgets.meter() if kill_tensorFlow != 'True': #window_name ='Object detector' top = int(scores[0]*100) color = (0,0,255) if person_found == True: widgets.meter(frame,top)#module #End Module # Displaying the image - DO NOT USE! #cv2.imshow(window_name, image) #SENSOR FUSION Flask VIDEO API #Brute Force Motion JPEG, OpenCV defaults to capture raw images, #so we must encode it into JPEG in order to correctly display the #video stream - NOTE need to work on this cv2.imencode tobytes slows the apparent frame rate by about 50%, plus the UI takes some #See: https://www.pyimagesearch.com/2017/02/06/faster-video-file-fps-with-cv2-videocapture-and-opencv/ ret, buffer = cv2.imencode('.jpg', frame) frame2 = buffer.tobytes() #the image that is saved #Capture Images and save to Annotate Named subdirectory under ~/Pictures #capture_flag = os.environ.get('cap_flag') annotate_name = os.environ.get('annotate_name') if capture_flag == 'True': #Check limit try: print("image limit: " + anno_images) capture_image_limit = int(anno_images) except: pass if capture_flag == 'True' and img_counter < capture_image_limit: #Create new or use existing directory path_to_directory = '../Pictures/' + annotate_name print("Saving to ", path_to_directory) try: os.makedirs(path_to_directory) except FileExistsError: #dir already exists, so overwrite existing (unless we datestamp)! pass img_name="../Pictures/"+annotate_name+"/"+annotate_name+"sf-frame_{}.jpg".format(img_counter) cv2.namedWindow("Capture Window") cv2.moveWindow("Capture Window", -500, -500)# push it off screen :) cv2.imwrite(img_name, frame1) print('Wrote Image-'+ img_name) img_counter +=1 #Clear Capture Flag when done grabbing images if capture_flag == 'True' and img_counter >= capture_image_limit: os.environ['cap_flag'] = 'False' img_counter = 0 yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame2 + b'\r\n') # concat frame one by one and show result ## End Video Stream API ### # Calculate framerate t2 = cv2.getTickCount() time1 = (t2-t1)/freq frame_rate_calc= 1/time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): print("CV2 Break") break # Press 'q' to quit quit_flag = os.environ.get('quit_flag') if quit_flag == 'quit':# os.environ['quit_flag'] = '' print("CV2 Quit " + quit_flag) cv2.destroyAllWindows() if videostream: #videostream.release() videostream.stop() print('Videostream stopped') break #print("quit_flag " + str(quit_flag)) # Clean up cv2.destroyAllWindows() if videostream: #videostream.release() videostream.stop() #os.system("pkill chromium") #webbrowser.open('http://localhost:5000', new=0) except KeyboardInterrupt: pass
class YOLOV5: def __init__(self, wanted_labels=None, model_file=None, label_file=None, num_threads=None, edgetpu=False, libedgetpu=None, score_threshold=0.25): basedir = os.getenv('DEEPDISHHOME','.') if model_file is None: model_file = os.path.join(basedir, 'detectors/yolov5/yolov5s-int8.tflite') if label_file is None: label_file = os.path.join(basedir, 'detectors/yolov5/coco_classes.txt') self.cfg_file = os.path.join(basedir, 'detectors/yolov5/yolov5s.yaml') if wanted_labels is None: wanted_labels = ['person'] self.wanted_labels = wanted_labels self.label_file = label_file self.score_threshold = score_threshold self.labels = self._get_labels() self.use_edgetpu = edgetpu self.int8 = False if 'saved_model' in model_file: self.mode = 'saved_model' if 'keras' not in sys.modules: print('yolov5: saved_model mode requires keras') sys.exit(1) elif '.tflite' in model_file: self.mode = 'tflite' if 'int8' in model_file: self.int8 = True else: print('unable to determine format of yolov5 model') sys.exit(1) if libedgetpu is None: libedgetpu = edgetpu_lib_name() if self.mode == 'tflite': # Load TFLite model and allocate tensors. self.interpreter = Interpreter( model_path=model_file, num_threads=num_threads, experimental_delegates=[load_delegate(libedgetpu)] if self.use_edgetpu else None) self.interpreter.allocate_tensors() self.num_threads = num_threads # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() _, self.height, self.width, _ = self.input_details[0]['shape'].tolist() elif self.mode == 'saved_model': self.model = keras.models.load_model(model_file) self.num_threads = 1 _, self.height, self.width, _ = self.model.inputs[0].shape.as_list() yaml_file = Path(self.cfg_file) with open(yaml_file) as f: cfg = yaml.load(f, Loader=yaml.FullLoader) self.anchors = cfg['anchors'] def _get_labels(self): labels_path = os.path.expanduser(self.label_file) with open(labels_path) as f: labels = {i: line.strip() for i, line in enumerate(f.readlines())} return labels def detect_image(self, img): img_size = img.size img_resized = img.convert('RGB').resize((self.width, self.height), Image.ANTIALIAS) input_data = np.expand_dims(img_resized, 0).astype(np.float32) if self.int8: scale, zero_point = self.input_details[0]['quantization'] input_data = (input_data / scale + zero_point).astype(np.uint8) if self.mode == 'tflite': self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() output_data = self.interpreter.get_tensor(self.output_details[0]['index']) raw = np.copy(output_data) elif self.mode == 'saved_model': input_data /= 255.0 output_data = self.model(input_data).numpy() if self.int8: scale, zero_point = self.output_details[0]['quantization'] output_data = output_data.astype(np.float32) output_data = (output_data - zero_point) * scale x = np.copy(output_data) boxes = np.copy(x[..., :4]) boxes[..., 0] = x[..., 0] - x[..., 2] / 2 boxes[..., 1] = x[..., 1] - x[..., 3] / 2 boxes[..., 2] = x[..., 0] + x[..., 2] / 2 boxes[..., 3] = x[..., 1] + x[..., 3] / 2 x[..., 5:] *= x[..., 4:5] best_classes = np.expand_dims(np.argmax(x[..., 5:], axis=-1), axis=-1) confidences = np.take_along_axis(x, best_classes + 5, axis=-1) y = np.concatenate((boxes, confidences, best_classes.astype(np.float32)), axis=-1) y = y[np.where(y[..., 4] >= self.score_threshold)] y[...,:4] *= np.array([img_size[0], img_size[1], img_size[0], img_size[1]]) return_boxs = [] return_lbls = [] return_scrs = [] for *xyxy, score, labelidx in y: label=self.labels[int(labelidx)] if label in self.wanted_labels and score >= self.score_threshold: tlwh = np.copy(xyxy) tlwh[2] = xyxy[2] - xyxy[0] tlwh[3] = xyxy[3] - xyxy[1] return_boxs.append(list(tlwh)) return_lbls.append(label) return_scrs.append(score) return (return_boxs, return_lbls, return_scrs)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', help='File path of .tflite file.', required=True) parser.add_argument('--labels', help='File path of labels file.', required=True) parser.add_argument('--threshold', help='Score threshold for detected objects.', required=False, type=float, default=0.4) # originally 0.4 args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter( args.model, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) #coral interpreter.allocate_tensors() _, input_height, input_width, _ = interpreter.get_input_details( )[0]['shape'] # initialize variables to calculate FPS instantaneous_frame_rates = [] # initialize variable for tracker use counter = 1 start_time = time.monotonic() t = [] # begin video stream internally ######vs = VideoStream(usePiCamera=True).start() vs = cv2.VideoCapture( '/home/pi/Desktop/object_detection/vision_system_multi_tracking_slow/test_video_11.mp4' ) # uncomment next two lines for exporting video # fourcc = cv2.VideoWriter_fourcc(*'XVID') #out = cv2.VideoWriter('output2.avi', fourcc, 30.0, (640,480)) # wait 1 second to give the camera time to adjust to lighting time.sleep(1.0) # main loop while True: #(vs.isOpened()): # calculating instantaneous FPS total_time = (time.monotonic() - start_time) start_time = time.monotonic() print("FPS: " + str(1 / (total_time))) # Keep track of loop number counter += 1 # get and resize current frame from camera ret, frame = vs.read() if ret == False: break frame = cv2.resize(frame, (input_width, input_height)) (H, W) = frame.shape[:2] # if no tracker exits if len(t) == 0: # formating the frame as an RGB image for the TensorFlow detector image_detector = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # get object detection results from TensorFlow Lite object detection model results = detect_objects(interpreter, image_detector, args.threshold) # get coordinates of bounding boxes rects = get_rects(results) # loops through results for i in np.arange(0, len(results)): #format bounding box coordinates for OpenCV tracker box = np.array(rects[i]) (startY, startX, endY, endX) = box.astype("int") cv_rect = (startX, startY, endX - startX, endY - startY) #Note on tracker types: #KCF: Average speed, Average accuracy #TLD: Average speed, works well is occlusion and scale changes #MOSSE: High speed, low accuracy #MedianFlow: High speed, good accuracy only on slow moving objects (current best) # initialize tracker #tracker = cv2.TrackerMedianFlow_create() #tracker.init(frame, cv_rect) #t.append(tracker) # draw bounding box from the detector on frame cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) # return active objects from the centroid tracker objects = ct.update(rects) # display object centroid on screen for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) #annotator.text([centroid[0],centroid[1]], text) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0)) cv2.circle(frame, (centroid[0], centroid[1]), 5, (0, 255, 0)) # if a tracker has already been set up else: for tracker in t: # update the tracker is new frame and get new results (success, box) = tracker.update(frame) # if tracker was successful if success: # draw bounding box; box format [xmin, ymin, width, height], cv2.rectangle format [xmin, ymin, xmax, ymax] cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])), (0, 255, 0), 2) # update centroud tracker; centroid format [ymin, xmin, ymax, xmax] # TODO: Fix formating! objects = ct.update([[ int(box[1]), int(box[0]), int(box[1] + box[3]), int(box[0] + box[2]) ]]) # draw centorid for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) #annotator.text([centroid[0],centroid[1]], text) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0)) cv2.circle(frame, (centroid[0], centroid[1]), 5, (0, 255, 0)) # Every n frames the tracker will be erased and the object detector will run again to re-initialize the tracker # n=15 for MedianFlow if (counter % 15) == 0: t = [] # resize frame for display frame = cv2.resize(frame, (640, 480)) # uncomment next time to export video # out.write(frame) cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF # key "q" quits main loop if key == ord("q"): break # once out of main loop program ends cv2.destroyAllWindows() #vs.stop vs.release
def startStream(self, modeldir, graph, labels, threshold, resolution, edgetpu): MODEL_NAME = modeldir GRAPH_NAME = graph LABELMAP_NAME = labels min_conf_threshold = float(threshold) resW, resH = resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = edgetpu # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) # Create window cv2.namedWindow('Object detector', cv2.WINDOW_NORMAL) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (labels[int(classes[i])] == 'person')): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) # print(self.detect) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw circle in center xcenter = xmin + (int(round((xmax - xmin) / 2))) ycenter = ymin + (int(round((ymax - ymin) / 2))) self.detect = setDetect(xcenter, ycenter, imH, imW) cv2.circle(frame, (xcenter, ycenter), 5, (0, 0, 255), thickness=-1) # Print info # print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')') # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up cv2.destroyAllWindows() videostream.stop()
watermark_interpreter = Interpreter( model_path="converted_watermark_model.tflite") print("Loaded watermark interpreter") print("Loading UV interpreter") uv_interpreter = Interpreter(model_path="converted_uv_model.tflite") print("Loaded UV interpreter") classify_input_details = classify_interpreter.get_input_details() classify_output_details = classify_interpreter.get_output_details() watermark_input_details = watermark_interpreter.get_input_details() watermark_output_details = watermark_interpreter.get_output_details() uv_input_details = uv_interpreter.get_input_details() uv_output_details = uv_interpreter.get_output_details() print("Allocating classification model tensors...") classify_interpreter.allocate_tensors() print("Allocated classification model tensors...") print("Allocating watermark model tensors...") watermark_interpreter.allocate_tensors() print("Allocated watermark model tensors...") print("Allocating UV model tensors...") uv_interpreter.allocate_tensors() print("Allocated UV model tensors...") print("All models loaded") print("Loading labels...") currency_labels = get_labels('currency_class_indices.txt') uv_labels = get_labels('uv_class_indices.txt') watermark_labels = get_labels('watermark_class_indices.txt') print("Labels loaded")
class camera_interface(): """ The main interface for using the camera and determining the grip we need to be in. https://www.hackster.io/gatoninja236/scan-qr-codes-in-real-time-with-raspberry-pi-a5268b Attributes: count (int): Count of saved screenshots. File titles are frame'count'.jpg. cap (cv2 VideoCapture): The VideoCapture object. detector (QRCodeDetector): The QR Code detecting object. """ def __init__(self,resolution=(640,480),framerate=30): self.count = 0 # self.cap = cv2.VideoCapture(0) self.vs = VideoStream(resolution=(1280,720),framerate=30).start() # self.stream = cv2.VideoCapture(0) # ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # ret = self.stream.set(3,resolution[0]) # ret = self.stream.set(4,resolution[1]) #Wait for the camera to startup for one seconds time.sleep(1) print("[INFO] Created video capture object") print("[INFO] loading model...") #Load the tflite model and labelmap # Get path to current working directory GRAPH_NAME = "detect.tflite" MODEL_NAME = "Camera_Interpreter/Coco" LABELMAP_NAME = "labelmap.txt" CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del(self.labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument use_TPU = False if use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # QR code detection object # self.detector = cv2.QRCodeDetector() self.cam_data = "" self.object_spotted = False self.test_count = 0 self.killed_thread = False self.cam_image = None self.cam_image_index = 0 self.object_spotted_T0 = 0 self.object_not_spotted_delta_req = 3 #Initialize the paused flag to false self.temp_pause = False def camera_read_threader(self): #Start the read cam thread read_cam = threading.Thread(target=self.read_cam_thread, args=()) read_cam.start() while(self.cam_image_index == 0): time.sleep(0.05) #Start the image decode thread decoder = threading.Thread(target=self.decode_image_thread, args=()) decoder.start() while not self.killed_thread and read_cam.is_alive() and decoder.is_alive(): time.sleep(0.25) #Flag is thrown or error, so ensure flag is thrown and wait for threads to join self.killed_thread = True read_cam.join() decoder.join() def decode_image_thread(self): previous_index = None while not self.killed_thread: #Detect and decode the stored image if it's ready # t = time.time() if(previous_index != self.cam_image_index and (not self.temp_pause)): previous_index = self.cam_image_index # data, _, _ = self.detector.detectAndDecode(self.cam_image) Deprecated QR Code reader data, score = self.detect_main_object(self.cam_image) # print("[INFO] Camera objects: " + data) # if(data not in grips._value2member_map_): # data = grips.openGrip.value #If the camera sees an object, skip the time requirement if(data != ""): self.cam_data = data self.object_spotted_T0 = time.time() self.object_spotted = True #If the camera doesn't see an object, require a delay before reporting nothing else: if((time.time() - self.object_spotted_T0) > self.object_not_spotted_delta_req): # print("[DEBUG] Delta Req passed; reporting no object now") self.cam_data = data self.object_spotted = False #####No sleep since detecting/decoding takes significant time, just do it as fast as possible # print("[INFO] Time to decode image: " + (str(time.time() - t))) time.sleep(0.01) def detect_main_object(self, frame1): min_conf_threshold = 0.35 # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'],input_data) self.interpreter.invoke() # Retrieve detection results # boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects highest_scoring_label = "" highest_score = 0 for i in range(len(scores)): object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index if((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (scores[i] > highest_score) and (object_name in grips._value2member_map_)): # Draw label highest_scoring_label = object_name highest_score = scores[i] return (highest_scoring_label, highest_score) def read_cam_thread(self): while not self.killed_thread: if(not self.temp_pause): # t = time.time() #Get camera image, rescale, and store in class variable frame = self.vs.read() self.cam_image = imutils.resize(frame, width=400) #Increase index by 1 self.cam_image_index += 1 #Pause temply time.sleep(0.2) # print("Time to save/resize new image: " + (str(time.time() - t))) # def read_cam(self): # # get the image # _, img = self.cap.read() #TODO: #14 Downscale the resolution for faster processing # # get bounding box coords and data # data, bbox, _ = self.detector.detectAndDecode(img) # #Define a parameter we can easily read later if anything is detected # is_object = False # #Update parameter/output the data we found, if any # if data: # #print("data found: ", data) # is_object = True # #return the information we got from the camera # # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # return data, bbox, img, is_object # def read_cam_display_out(self): # #Call the standard method to get the qr data / bounding box # data, bbox, img, _ = self.read_cam() # # if there is a bounding box, draw one, along with the data # if(bbox is not None): # for i in range(len(bbox)): # cv2.line(img, tuple(bbox[i][0]), tuple(bbox[(i+1) % len(bbox)][0]), color=(255, # 0, 255), thickness=2) # cv2.putText(img, data, (int(bbox[0][0][0]), int(bbox[0][0][1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0, 255, 0), 2) # #if data: # #print("data found: ", data) # # display the image preview # cv2.imshow("code detector", img) # # save the image # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # #self.count += 1 def end_camera_session(self): #Stop the camera thread self.killed_thread = True time.sleep(0.1) #Release the camera object self.vs.stop()
class ImageProcessing: import numpy as np import cv2 import math from threading import Thread from multiprocessing import Process from threading import Timer import os def __init__(self, resolution, flag): self.height, self.width = resolution self.resolution = self.height, self.width self.index=0 self.mode = 0 self.flag = flag self.camera = self.cv2.VideoCapture(-1) self.camera.set(self.cv2.CAP_PROP_FRAME_HEIGHT, self.height) self.camera.set(self.cv2.CAP_PROP_FRAME_WIDTH, self.width) self._setROI() self._setCoral() self._getDashboard() self._getIpmMat() self._getMaskBG() def _setROI(self): self.ROI_W = int(self.width*0.2) self.ROI_H = int(self.height*0.35) self.ROI_far_pos = 0.65 # Set along your vehicle velocity and frane rate | Late: far, Fast: near self.ROI_far_rngH = slice(int(self.height*self.ROI_far_pos - self.ROI_H), int(self.height*self.ROI_far_pos)) self.ROI_near_rngH = slice(int(self.height - self.ROI_H), int(self.height)) self.ROI_rngW = slice(int(self.width/2 - self.ROI_W/2),int(self.width/2 + self.ROI_W/2)) self.ROI_lane_rngH = slice(int(self.height*0.7 - self.ROI_H*0.7), int(self.height*0.7)) self.ROI_lane_rngW = slice(int(self.width/2 - self.ROI_W*0.5),int(self.width/2 + self.ROI_W*0.5)) def _getIpmMat(self): # Your camera inner & external parameters alpha = (7-90)*self.np.pi/180 beta = 0 gamma = 0 dist = 300 focal = 500 # Calculating rotational transformation matrix A1 = self.np.array([[1,0,-self.width/2],[0,1,-self.height/2],[0,0,0],[0,0,1]],dtype='f') RX = self.np.array([[1,0,0,0],[0,self.math.cos(alpha), -self.math.sin(alpha),0],[0,self.math.sin(alpha),self.math.cos(alpha),0],[0,0,0,1]],dtype='f') RY = self.np.array([[self.math.cos(beta),0,-self.math.sin(beta),0],[0,1,0,0],[self.math.sin(beta),0,self.math.cos(beta),0],[0,0,0,1]],dtype='f') RZ = self.np.array([[self.math.cos(gamma),-self.math.sin(gamma),0,0],[self.math.sin(gamma),self.math.cos(gamma),0,0],[0,0,1,0],[0,0,0,1]],dtype='f') R = self.np.dot(RX,self.np.dot(RY,RZ)) T = self.np.array([[1,0,0,0],[0,1,0,0],[0,0,1,dist],[0,0,0,1]],dtype='f') K = self.np.array([[focal,0,self.width/2,0],[0,focal,self.height/2,0],[0,0,1,0]],dtype='f') self.IpmMat = self.np.dot(K,self.np.dot(T,self.np.dot(R,A1))) def _getMaskBG(self): # Mask for blank area when images were ipm mapped tmp_blank = self.np.full((self.height, self.width,3), 255, dtype='uint8') tmp_ipm = self.cv2.warpPerspective(tmp_blank, self.IpmMat, (self.width, self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP) self.maskBG = self.cv2.bitwise_not(tmp_ipm) def _getDashboard(self): self.board_size = 320 size = self.board_size self.board = self.np.full((size*2, size*2,3), 255, dtype='uint8') tmp_stopline = self.cv2.imread('dashboard/stopline_red.jpg',self.cv2.IMREAD_COLOR) self.icon_stopline = self.cv2.resize(tmp_stopline, dsize=(size,size), interpolation=self.cv2.INTER_AREA) tmp_blindspot = self.cv2.imread('dashboard/blind_spot_red.jpg',self.cv2.IMREAD_COLOR) self.icon_blindspot = self.cv2.resize(tmp_blindspot, dsize=(size,size), interpolation=self.cv2.INTER_AREA) self.icon_blank = self.np.full((size,size*2,3),255,dtype='uint8') self.icon_schoolzone = self.np.full((size,size*2,3),0,dtype='uint8') self.icon_schoolzone[:,:,2] = 255 self.icon_subs = self.np.full((size,size,3),0,dtype='uint8') self.icon_subs[:,:,2] = 255 def processing(self): # Calibration parameters by experiments CamMat = self.np.array([[314.484, 0, 321.999],[0, 315.110, 259.722],[ 0, 0, 1]],dtype='f') DistMat = self.np.array([ -0.332015, 0.108453, 0.001100, 0.002183],dtype='f') # For inRange function in opencv # Modify value along your brightness condition lower_k = self.np.array([0,0,0]) upper_k = self.np.array([180,255,100]) lower_r1 = self.np.array([0,50,50]) upper_r1 = self.np.array([30,255,255]) lower_r2 = self.np.array([150,50,50]) upper_r2 = self.np.array([180,255,255]) ret, frame = self.camera.read(); del(ret) # Now take frame from camera calibration = self.cv2.undistort(frame, CamMat, DistMat, None, CamMat) # Calibration because of wide angle camera tmp_ipm1 = self.cv2.warpPerspective(calibration, self.IpmMat, (self.width,self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP) # Geometrical transform image to Top view perspective tmp_ipm2 = self.cv2.add(tmp_ipm1, self.maskBG) # It just merges ipm image with white background ipm = self.cv2.bilateralFilter(tmp_ipm2,9,50,50) # Just Filter self.result = ipm.copy() hsv = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2HSV) gray = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2GRAY) #canny = self.cv2.Canny(gray, 100, 200, 3) # If you want to use canny edge algorithm, activate this line threshold_inv = self.cv2.adaptiveThreshold(gray, 255, self.cv2.ADAPTIVE_THRESH_MEAN_C, self.cv2.THRESH_BINARY, 21, 5) threshold = self.cv2.bitwise_not(threshold_inv) #mask_k = self.cv2.inRange(hsv, lower_k, upper_k) #mask_k = canny.copy() mask_k = threshold.copy() self.mask_k = mask_k[self.ROI_far_rngH, self.ROI_rngW]#[self.ROI_far_rngH, self.ROI_rngW] self.mask_lane = mask_k[self.ROI_lane_rngH,self.ROI_lane_rngW] # Now you can get red mask for schoolzone detecting mask_r1 = self.cv2.inRange(hsv, lower_r1, upper_r1) mask_r2 = self.cv2.inRange(hsv, lower_r2, upper_r2) mask_r = self.cv2.add(mask_r1, mask_r2) self.mask_r = mask_r[self.ROI_near_rngH, self.ROI_rngW] def detectingSchoolzone(self): # Just counting red dots if((self.np.sum(self.mask_r)/255) > ((self.ROI_H)*(self.ROI_W)*0.2)): self.flag.schoolzone = True else: self.flag.schoolzone = False def laneDetect(self): # By Jinwon, Lane detecting algorithm. # adaptive detecting method # It need to improve frame = self.cv2.flip(self.mask_lane.copy(),0) H,W = frame.shape[0:2] lane_base = self.np.array(range(0,W)) lane = self.np.full((H,1), int(W/2), dtype='uint32') laneL = self.np.full((H,1), int(W/2), dtype='uint32') laneR = self.np.full((H,1), int(W/2), dtype='uint32') num0 = self.np.sum(frame[0,:] != False) if(num0 != 0): lane[0] = int(self.np.sum(lane_base*frame[0,:])/(255*num0)) else: lane[0] = int(W/2) for j in range(1,H): rangeL = range(0, int(lane[j-1])) rangeR = range(int(lane[j-1]), int(W)) numL = self.np.sum(frame[j,rangeL] != False) numR = self.np.sum(frame[j,rangeR] != False) if(numL == 0)|(numR == 0): lane[j] = lane[j-1] else: laneL[j] = self.np.sum(lane_base[rangeL]*frame[j,rangeL])/(255*numL) laneR[j] = self.np.sum(lane_base[rangeR]*frame[j,rangeR])/(255*numR) lane[j] = (laneR[j] + laneL[j])/2 self.mask_lane[int(H - j),int(lane[j])] = 255 self.flag.lane_err = ((self.np.mean(lane)*2/W) -1) # Return method is various. It just return mean value def _setCoral(self, modeldir="Model"): # By github.com/EdjeElectronics & coral.ai CWD_PATH =self.os.getcwd() MODEL_NAME = modeldir GRAPH_NAME = "edgetpu.tflite" #If you don't have coral, "detect.tflite" LABELMAP_NAME = "labelmap.txt" # path to PATH_TO_CKPT = self.os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = self.os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.coral_labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.coral_labels[0] == '???': del(self.coral_labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument self.coral_interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) self.coral_interpreter.allocate_tensors() # Get model details self.coral_input_details = self.coral_interpreter.get_input_details() self.coral_output_details = self.coral_interpreter.get_output_details() self.coral_height = self.coral_input_details[0]['shape'][1] self.coral_width = self.coral_input_details[0]['shape'][2] self.coral_input_mean = 127.5 self.coral_input_std = 127.5 def detectingStopline(self): # By github.com/EdjeElectronics & coral.ai image = self.mask_k.copy() imH,imW = image.shape[0:2] # Get image & general coral_frame = self.cv2.cvtColor(image, self.cv2.COLOR_GRAY2RGB) frame_rgb = self.cv2.cvtColor(coral_frame, self.cv2.COLOR_BGR2RGB) frame_resized = self.cv2.resize(frame_rgb, (self.coral_width, self.coral_height)) input_data = self.np.expand_dims(frame_resized, axis=0) # Perform the actual detection by running the model with the image as input self.coral_interpreter.set_tensor(self.coral_input_details[0]['index'],input_data) self.coral_interpreter.invoke() # Retrieve detection results self.coral_boxes = self.coral_interpreter.get_tensor(self.coral_output_details[0]['index'])[0] # Bounding box coordinates of detected objects self.coral_classes = self.coral_interpreter.get_tensor(self.coral_output_details[1]['index'])[0] # Class index of detected objects self.coral_scores = self.coral_interpreter.get_tensor(self.coral_output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Threshold self.coral_min_conf_threshold = 0.90 self.flag.stopline = False # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(self.coral_scores)): if ((self.coral_scores[i] > self.coral_min_conf_threshold) and (self.coral_scores[i] <= 1.0)): if(self.coral_labels[int(self.coral_classes[i])] == "stopline"): self.flag.stopline = True # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(self.coral_boxes[i][0] * imH)) + self.ROI_far_rngH.start) xmin = int(max(1,(self.coral_boxes[i][1] * imW)) + self.ROI_rngW.start) ymax = int(min(imH,(self.coral_boxes[i][2] * imH)) + self.ROI_far_rngH.start) xmax = int(min(imW,(self.coral_boxes[i][3] * imW)) + self.ROI_rngW.start) self.cv2.rectangle(self.result, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2) # Draw label object_name = self.coral_labels[int(self.coral_classes[i])] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(self.coral_scores[i]*100)) # Example: 'person: 72%' labelSize, baseLine = self.cv2.getTextSize(label, self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window self.cv2.rectangle(self.result, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), self.cv2.FILLED) # Draw white box to put label text in self.cv2.putText(self.result, label, (xmin, label_ymin-7), self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Thread def task(self): while(1): t1 = self.cv2.getTickCount() self.processing() self.laneDetect() self.detectingSchoolzone() self.flag.schoolzone = False #tmp if(self.mode == 1): self.detectingStopline() self.board[160:640,:,:] = self.result.copy() self.board[0:self.board_size,:,:] = self.icon_blank.copy() # Now, Mode Selector if(self.mode == 0): if(self.flag.schoolzone == True): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.mode = 1 elif(self.mode == 1): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.stopline == True): self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy() self.mode = 2 self.flag.stop = True elif(self.mode == 2): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy() if(self.flag.depart == True): self.flag.depart = False self.flag.powerHandle = True self.mode = 3 elif(self.mode == 3): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.refresh == True): self.flag.refresh = False self.flag.lidar = True self.mode = 4 elif(self.mode == 4): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.blindspot == True): self.mode = 5 self.flag.slow = True elif(self.mode == 5): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.board[0:self.board_size,self.board_size:(self.board_size*2),:] = self.icon_blindspot.copy() if(self.flag.blindspot == False): self.mode = 6 self.flag.slow = False elif(self.mode == 6): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.schoolzone == False): self.board[0:self.board_size,:,:] = self.icon_blank.copy() self.mode = 7 self.cv2.imshow('Dashboard', self.board) t2 = self.cv2.getTickCount() freq = self.cv2.getTickFrequency() #print(freq/(t2-t1)) self.cv2.waitKey(1) if(self.mode == 7): self.flag.end = True break self.cv2.destroyAllWindows() def start(self): self.thread = self.Thread(target=self.task) self.thread.start() def startLane(self): self.threadLane = self.Thread(target=self.taskLane) self.threadLane.start()
class PoseEngine(): """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally. Raises: ValueError: An error occurred when model output is invalid. """ edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB) posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB) self._interpreter = Interpreter( model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate]) self._interpreter.allocate_tensors() self._mirror = mirror self._input_tensor_shape = self.get_input_tensor_shape() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self._input_height, self._input_width, self._input_depth = self.get_input_tensor_shape() self._input_type = self._interpreter.get_input_details()[0]['dtype'] self._inf_time = 0 def run_inference(self, input_data): """Run inference using the zero copy feature from pycoral and returns inference time in ms. """ start = time.monotonic() edgetpu.run_inference(self._interpreter, input_data) self._inf_time = time.monotonic() - start return (self._inf_time * 1000) def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ input_details = self._interpreter.get_input_details() image_width, image_height = img.size resized_image = img.resize( (self._input_width, self._input_height), Image.NEAREST) input_data = np.expand_dims(resized_image, axis=0) if self._input_type is np.float32: # Floating point versions of posenet take image data in [-1,1] range. input_data = np.float32(resized_image) / 128.0 - 1.0 else: # Assuming to be uint8 input_data = np.asarray(resized_image) self.run_inference(input_data.flatten()) return self.ParseOutput() def get_input_tensor_shape(self): """Returns input tensor shape.""" return self._interpreter.get_input_details()[0]['shape'] def get_output_tensor(self, idx): """Returns output tensor view.""" return np.squeeze(self._interpreter.tensor( self._interpreter.get_output_details()[idx]['index'])()) def ParseOutput(self): """Parses interpreter output tensors and returns decoded poses.""" keypoints = self.get_output_tensor(0) keypoint_scores = self.get_output_tensor(1) pose_scores = self.get_output_tensor(2) num_poses = self.get_output_tensor(3) poses = [] for i in range(int(num_poses)): pose_score = pose_scores[i] pose_keypoints = {} for j, point in enumerate(keypoints[i]): y, x = point if self._mirror: y = self._input_width - y pose_keypoints[KeypointType(j)] = Keypoint( Point(x, y), keypoint_scores[i, j]) poses.append(Pose(pose_keypoints, pose_score)) return poses, self._inf_time
# 라벨 맵을 불러옴, 첫번째 라벨이 ??? 인 경우가 있어서 지워야한다고함 with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) # TPU를 사용할 경우 특별한 load_delegate argument를 사용함 if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # 모델의 세부사항을 불러온다. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # frame rate를 위한 변수들 선언 frame_rate_calc = 1 freq = cv2.getTickFrequency() # 이제 비디오 스트림을 생성한다.
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--model', help='File path of .tflite file.', required=True) parser.add_argument( '--labels', help='File path of labels file.', required=True) parser.add_argument( '--threshold', help='Score threshold for detected objects.', required=False, type=float, default=0.4) args = parser.parse_args() labels = load_labels(args.labels) interpreter = Interpreter(args.model, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) #coral interpreter.allocate_tensors() _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape'] # initialize variables to calculate FPS instantaneous_frame_rates = [] # initialize variable for tracker use #trackers = [] #j=0 counter = 0 t = None #win = dlib.image_window() test_start_time = time.monotonic() with picamera.PiCamera( resolution=(CAMERA_WIDTH, CAMERA_HEIGHT), framerate=30) as camera: camera.start_preview() #alpha = 200 start_time = time.monotonic() try: stream = io.BytesIO() annotator = Annotator(camera) for _ in camera.capture_continuous( stream, format='jpeg', use_video_port=True): #start_time = time.monotonic() stream.seek(0) print("Test FPS: " + str(1/(time.monotonic() - test_start_time))) counter += 1 #start_time = time.monotonic() #start_time declaration moved to give a more accurate measurement to calculate FPS image = Image.open(stream).convert('RGB') dlib_img = np.asarray(image) #image.save("test_save.jpg") #image = Image.open(stream).convert('RGB').resize((input_width, input_height), Image.ANTIALIAS) #image = image.resize((input_width, input_height), Image.ANTIALIAS) #dlib_img = dlib.load_rgb_image("/home/pi/Desktop/object_detection/object_detection_tpu_tracking_dlib/test_save.jpg") annotator.clear() # if there are no trackes, first must try to detect objects #if len(trackers) == 0: if t == None: #dlib_img = np.asarray(image) image = image.resize((input_width, input_height), Image.ANTIALIAS) results = detect_objects(interpreter, image, args.threshold) # get the coordinates for all bounding boxes within frame rects = get_rects(results) for i in np.arange(0,len(results)): #format bounding box coordinates box = np.array(rects[i]) (startY, startX, endY, endX) = box.astype("int") print(startX, startY, endX, endY) #x = (startX + endX) / 2 #y = (startY + endY) / 2 dlib_rect = dlib.rectangle(startX, startY, endX, endY) t = dlib.correlation_tracker() t.start_track(dlib_img, dlib_rect) #trackers.append(t) #annotator.clear() #annotator.centroid(x, y) #annotate_objects(annotator, results, labels) else: t.update(dlib_img) pos = t.get_position() startX = int(pos.left()) startY = int(pos.top()) endX = int(pos.right()) endY = int(pos.bottom()) x = (startX + endX) / 2 y = (startY + endY) / 2 #annotator.centroid(x, y) #annotator.clear() annotator.bounding_box([startX, startY, endX, endY]) #if (counter % 20) == 0: #t = None elapsed_ms = (time.monotonic() - start_time) * 1000 annotator.text([5, 0], '%.1f ms' % (elapsed_ms)) frame_rate = 1/ ((time.monotonic() - start_time)) start_time = time.monotonic() print(frame_rate) #calculate average FPS instantaneous_frame_rates.append(frame_rate) avg_frame_rate = sum(instantaneous_frame_rates)/len(instantaneous_frame_rates) print("FPS: " + str(avg_frame_rate)) #annotator.text([5, 15], '%.1f FPS' % (avg_frame_rate)) #annotator.clear() annotator.update() stream.seek(0) stream.truncate() test_start_time = time.monotonic() #print(time.monotonic()) finally: camera.stop_preview()