def detect_objects(threshold = 0.1, top_count=3): interpreter = common.make_interpreter(default_model) interpreter.allocate_tensors() labels = load_labels(default_labels) cap = cv2.VideoCapture(default_camera_idx) if cap.isOpened(): for i in range(0,15): ret, frame = cap.read() time.sleep(1/1000) if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=threshold, top_k=top_count) cv2_im = append_objs_to_img(cv2_im, objs, labels) cv2.imshow('detect', cv2_im) cv2.waitKey(50) def make(obj): return Result( percent = int(100 * obj.score), label = labels.get(obj.id, 'unknown') ) cap.release() return [make(obj) for obj in objs]
def user_callback(input_tensor, src_size, inference_box, mot_tracker): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) end_time = time.monotonic() detections = [] # np.array([]) for n in range(0, len(objs)): element = [] # np.array([]) element.append(objs[n].bbox.xmin) element.append(objs[n].bbox.ymin) element.append(objs[n].bbox.xmax) element.append(objs[n].bbox.ymax) element.append(objs[n].score) # print('element= ',element) detections.append(element) # print('dets: ',dets) # convert to numpy array # print('npdets: ',dets) detections = np.array(detections) trdata = [] trackerFlag = False if detections.any(): if mot_tracker != None: trdata = mot_tracker.update(detections) trackerFlag = True text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] if len(objs) != 0: return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines, trdata, trackerFlag)
def detect_objects(args): interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) dirname = args.images dirpath = Path('results/' + dirname) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) Path("results/" + dirname).mkdir(parents=True, exist_ok=True) for filename in glob.glob(dirname + "/*.jpeg"): print(filename) name = os.path.basename(filename) pil_im = Image.open(filename) open_cv_image = np.array(pil_im) snapshot_im = pil_im common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) #print(objs) open_cv_image = append_objs_to_img(open_cv_image, objs, labels) cv2_im_rgb = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB) (flag, encodedImage) = cv2.imencode(".jpeg", cv2_im_rgb) #print(flag) #print(encodedImage) f = open("./results/" + dirname + "/" + name, "wb") f.write(encodedImage) f.close()
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() # Run hand detection common.set_input(detection_interpreter, input_tensor) detection_interpreter.invoke() detection_results = get_detection_output(detection_interpreter) # Resize image and set as input buf = input_tensor _, map_info = buf.map(Gst.MapFlags.READ) np_input = np.ndarray(shape=(h, w, 3), dtype=np.uint8, buffer=map_info.data) pil_input = Image.fromarray(np_input) pil_input = pil_input.resize((224, 224), Image.NEAREST) np_input = np.asarray(pil_input) common.input_tensor(classification_interpreter)[:, :] = np_input # Run hand classification classification_interpreter.invoke() classification_results = get_classification_output( classification_interpreter) end_time = time.monotonic() if show_display: return generate_svg(src_size, detection_results, classification_results) return
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--model', help='File path of .tflite model.', default='inception_v4_299_quant_edgetpu.tflite') parser.add_argument( '--labels', help='File path of labels file.', default='imagenet_labels.txt') parser.add_argument( '--top_k', help='Number of classifications to list', type=int, default=1) args = parser.parse_args() print('Initializing TF Lite interpreter...') interpreter = common.make_interpreter(os.path.join(default_model_dir,args.model)) interpreter.allocate_tensors() labels = load_labels(os.path.join(default_model_dir, args.labels)) cap = cv2.VideoCapture(0) while (True): ret, frame = cap.read() cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) results = classify_image(interpreter, pil_im, args.top_k) for label_id, prob in results: cv2.putText(frame, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2) print('%s: %.5f' % (labels[label_id], prob)) cv2.imshow('Classification', frame) if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows()
def get_frame(self): if self.video.isOpened() : ret, frame = self.video.read() cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(self.interpreter, pil_im) self.interpreter.invoke() objs = get_output(self.interpreter, score_threshold=threshold, top_k=top_k) cv2_im = append_objs_to_img(cv2_im, objs, self.labels) # cv2.imshow('frame', cv2_im) for result in objs: label = '{:.0f}% {}'.format(100*result.score, self.labels.get(result.id, result.id)) if self.labels.get(result.id) == "person" and result.score > 0.6: self.file.write("1") self.file.seek(0) else: self.file.write("0") self.file.seek(0) print(label) if cv2.waitKey(1) & 0xFF == ord('q'): return sleep(0.2) return frame
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) #cv2.imshow('frame', cv2_im) #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) #pil_im = Image.fromarray(cv2_im_rgb) #handle_image_conversion(pil_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def start_detector(args, interpreter, labels, camera_res): """ Detect max_objs objects from camera frames. """ detected_objects.clear() try: cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_u = cv2.undistort(cv2_im, common.CAMERA_MATRIX, common.DIST_COEFFS) cv2_im_u_rgb = cv2.cvtColor(cv2_im_u, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_u_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = common.get_output(interpreter, score_threshold=args.threshold, labels=labels) # Reject images with number of detected objects > max_objs. if len(objs) > args.max_objs: continue # Create proto buffer message and add to stack. for obj in objs: detected_object = detection_server_pb2.DetectedObject( label=obj.label, score=obj.score, area=obj.area, centroid=detection_server_pb2.DetectedObject.Centroid( x=obj.centroid.x, y=obj.centroid.y), bbox=detection_server_pb2.DetectedObject.BBox( xmin=obj.bbox.xmin, ymin=obj.bbox.ymin, xmax=obj.bbox.xmax, ymax=obj.bbox.ymax)) detected_objects.appendleft(detected_object) if args.display: cv2_im_u = common.annotate_image(objs, camera_res, cv2_im_u) cv2.imshow('frame', cv2_im_u) if cv2.waitKey(1) & 0xFF == ord('q'): break except cv2.error as e: print('cv2 error: {e}'.format(e)) finally: cap.release() cv2.destroyAllWindows() return
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(face_interpreter, input_tensor) face_interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(face_interpreter, args.threshold, args.top_k) # Get face detected part from PIL import Image im = Image.fromarray(common.input_tensor(face_interpreter)) src_w, src_h = src_size inf_w, inf_h = inference_size results = [] emo_objs = [] for obj in objs: x0, y0, x1, y1 = list(obj.bbox) # Relative coordinates. x, y, w, h = x0, y0, x1 - x0, y1 - y0 # Absolute coordinates, input tensor space. x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int( h * inf_h) crop_rectangle = (x, y, x + w, y + h) # get face face = im.crop(crop_rectangle) face = np.array(face) # convert to grayscale #face = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY) print(face.shape) face = cv2.resize(face, (224, 224)) face = face.astype(np.uint8) #face /= float(face.max()) face = np.reshape(face.flatten(), (224, 224, 3)) # invoke fer interpreter common.set_input2(fer_interpreter, face) fer_interpreter.invoke() # process results results = get_emotion(fer_interpreter) if len(results) > 0: setattr(obj, "id", results[0].id) setattr(obj, "score", results[0].score) emo_objs.append(obj) objs = emo_objs end_time = time.monotonic() text_lines = [] if len(objs) > 0: text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) #print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
def detect(self, duration_sec): if self.error: return False human_detected = False labels_path = os.path.join(self.model_directory, self.labels_file) labels = self.load_labels(labels_path) utc_timestamp = round(datetime.datetime.now().replace( tzinfo=datetime.timezone.utc).timestamp()) video_name = os.path.join( self.log_directory, 'iot-hub-detect-' + str(utc_timestamp) + '.mp4') expire_time = utc_timestamp + duration_sec fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(video_name, fourcc, 24.0, (640, 480)) print('Monitoring`: ' + self.camera_stream_url + ' for ' + str(duration_sec) + ' seconds...') while utc_timestamp < expire_time or out != None: ret, cv2_im = self.stream.read() cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(self.interpreter, pil_im) self.interpreter.invoke() objs = self.get_output(self.interpreter, score_threshold=self.score_threshold, top_k=self.top_k) person_detected, cv2_im = self.append_objs_to_img( cv2_im, objs, labels) if person_detected: if person_detected != human_detected: print('HUMAN DETECTED @ ' + str(utc_timestamp)) human_detected = True if out != None: if expire_time <= utc_timestamp: print('Finished writing ' + video_name) out.release() out = None break else: out.write(cv2_im) else: break cv2.imshow(self.camera_stream_url, cv2_im) utc_timestamp = round(datetime.datetime.now().replace( tzinfo=datetime.timezone.utc).timestamp()) if cv2.waitKey(1) & 0xFF == ord('q'): if out != None: out.release() out = None break self.stream.release() cv2.destroyAllWindows() return human_detected
def user_callback(input_tensor, src_size, inference_box): global access global house global parcel nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance results = get_output(interpreter, args.top_k, args.threshold) end_time = time.monotonic() text_lines = [ ' ', 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) if gpio6.read() == True: access = 2 Gtk.main_quit() elif house: if labels.get( result.id, result.id ) == "tree frog, tree-frog" and result.score > 0.3: access = 1 Gtk.main_quit() elif (labels.get(result.id, result.id) == "acoustic guitar" or labels.get(result.id, result.id) == "jigsaw puzzle" or labels.get(result.id, result.id) == "jellyfish" or labels.get(result.id, result.id) == "basketball" or labels.get(result.id, result.id) == "soccer ball") and result.score > 0.3: access = 0 Gtk.main_quit() elif parcel: if labels.get( result.id, result.id) == "acoustic guitar" and result.score > 0.3: access = 1 Gtk.main_quit() elif (labels.get(result.id, result.id) == "tree frog, tree-frog" or labels.get(result.id, result.id) == "jigsaw puzzle" or labels.get(result.id, result.id) == "jellyfish" or labels.get(result.id, result.id) == "basketball" or labels.get(result.id, result.id) == "soccer ball") and result.score > 0.3: access = 0 Gtk.main_quit() print(' '.join(text_lines)) return generate_svg(src_size, text_lines)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # imagezmq receiver image_hub = imagezmq.ImageHub(open_port='tcp://147.47.200.65:35556', REQ_REP=False) # REQ_REP=False: use PUB/SUB (non-block) #cap = cv2.VideoCapture(args.camera_idx) while True: # receive from zmq timestamp, frame = image_hub.recv_image() dt = datetime.fromtimestamp(timestamp) #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) start = time.monotonic() common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) inference_time = time.monotonic() - start inference_time = 'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time) cv2_im = append_objs_to_img(cv2_im, objs, labels, inference_time, dt) #cv2_im = cv2.resize(cv2_im, (720, 720)) cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN) cv2.imshow("frame", cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def detect(frame, interpreter, labels, threshold, k): """ detects objects in each frame returns the frame and a list of objects (boundary box) detected """ cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=threshold, top_k=k) cv2_im = append_objs_to_img(cv2_im, objs, labels) return cv2_im, objs
def detect(frame, interpreter, labels, threshold, k): """ Detects objects in each frame using an interpreter engine. Returns the editted frame with bounding boxes added and a list of objects detected. """ cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=threshold, top_k=k) cv2_im = append_objs_to_img(cv2_im, objs, labels) return cv2_im, objs
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
def gen_frames(): # generate frame by frame from camera #Current regression model is unstable, so we take running averages of sin, cos, and angle to stabilize the values moving_window = 10 runsin = np.zeros(moving_window) runcos = np.zeros(moving_window) runtheta = np.zeros(moving_window) while True: ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() #Uncomment the following if you need to use bounding boxes or classification schemes and to use it for overlay #objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) #cv2_im = append_objs_to_img(cv2_im, objs, labels) sincos = common.output_tensor(interpreter, 0) runsin = np.roll(runsin, 1) runsin[0] = sincos[0] runcos = np.roll(runcos, 1) runcos[0] = sincos[1] runtheta = np.roll(runtheta, 1) runtheta[0] = 180 / np.pi * np.arctan2(sincos[0], sincos[1]) cv2_im = cv2.putText(cv2_im, 'angle: {}'.format(np.average(runtheta)), (0, 480 - 90), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2) cv2_im = cv2.putText(cv2_im, 'sin: {}'.format(np.average(runsin)), (0, 480 - 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2) cv2_im = cv2.putText(cv2_im, 'cos: {}'.format(np.average(runcos)), (0, 480 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2) sd.putNumber( "WOF Angle", 180 / np.pi * np.arctan2(np.average(runsin), np.average(runcos))) ret, buffer = cv2.imencode('.jpg', frame) frame = buffer.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n' ) # concat frame one by one and show result if cv2.waitKey(1) & 0xFF == ord('q'): break
def get_features(interpreter, patches): #patches je lista ndarray objekata, treba pretvoriti u tenzor istih dimenzija ili sliku po sliku ubacivat features = [] for patch in patches: if 0 in patch.shape: features.append(None) continue common.set_input(interpreter, Image.fromarray(patch)) interpreter.invoke() feature = common.output_tensor(interpreter, 0) features.append(feature) return features
def detect_object(args): global outputFrame, lock interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) if args.videosrc=='dev': cap = cv2.VideoCapture(args.camera_idx) else: if args.netsrc==None: print("--videosrc was set to net but --netsrc was not specified") sys.exit() cap = cv2.VideoCapture(args.netsrc) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) snapshot_im = pil_im common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) if args.displayBool == 'True': cv2.imshow('frame', cv2_im) # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = cv2_im.copy() if (time.time() - last_save) >=1: take_snapshot(snapshot_im, objs, labels, exclude=args.exclude.split(','), include=args.include.split(',')) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance results = get_output(interpreter, args.top_k, args.threshold) # print(src_size, results, input_tensor ) end_time = time.monotonic() text_lines = [ ' ', 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) print(' '.join(text_lines)) return generate_svg(src_size, text_lines)
def main(): if (edgetpu==1): mdl = model_edgetpu else: mdl = model interpreter, labels =cm.load_model(model_dir,model_edgetpu,lbl,edgetpu) fps=1 while True: start_time=time.time() #----------------Capture Camera Frame----------------- ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = cv2.flip(cv2_im, 0) cv2_im = cv2.flip(cv2_im, 1) cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) #-------------------Inference--------------------------------- cm.set_input(interpreter, pil_im) interpreter.invoke() objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k) #-----------------other------------------------------------ track_object(objs,labels)#tracking <<<<<<< fps = round(1.0 / (time.time() - start_time),1) print("*********FPS: ",fps,"************") #----------------------------------------------------- cap.release() cv2.destroyAllWindows()
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) # Get face detected part from PIL import Image im = Image.fromarray(common.input_tensor(interpreter)) src_w, src_h = src_size inf_w, inf_h = inference_size results = [] for obj in objs: x0, y0, x1, y1 = list(obj.bbox) # Relative coordinates. x, y, w, h = x0, y0, x1 - x0, y1 - y0 # Absolute coordinates, input tensor space. x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(h * inf_h) crop_rectangle = (x, y, x+w, y+h) face_part = im.crop(crop_rectangle) # invoke fer interpreter common.set_input2(interpreter_fer, face_part) interpreter_fer.invoke() results = get_output2(interpreter_fer, args.top_k, args.threshold) if len(results) > 0: setattr(obj, "id", results[0].id) setattr(obj, "score", results[0].score) end_time = time.monotonic() text_lines = [] text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format(result.score, labels.get(result.id, result.id))) print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
def classify(model_type=ModelType.General, top_k=1): interpreter = common.make_interpreter(model_type.model_path()) interpreter.allocate_tensors() labels = load_labels(model_type.label_path()) cap = cv2.VideoCapture(0) if cap.isOpened(): for i in range(0,15): ret, frame = cap.read() time.sleep(1/1000) if not ret: break cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) results = classify_image(interpreter, pil_im, top_k) for label_id, prob in results: cv2.putText(cv2_im_rgb, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2) print('%s: %.5f' % (labels[label_id], prob)) cv2.imshow('Classification', cv2_im_rgb) cv2.waitKey(50) def make(obj): fs = "{0}({1})" parsed = parse.parse(fs, labels[obj[0]]) if parsed != None and len(parsed.fixed) > 1: tLabel = parsed[1] else: tLabel = labels[obj[0]] return Result( label = tLabel, percent = int(100 * obj[1]) ) cap.release() return [make(obj) for obj in results]
def main(): #default_model_dir = './all_models' # Set face detection model # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver default_model = './1NN/quantized/two_nn_nomask.tflite' # GPU ver default_labels = 'face_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=default_model) # Set mask classification model default_model2 = 'mask_detector_quant.tflite' # GPU ver #default_model2 = 'mask_detector_quant_edgetpu.tflite' #Coral ver parser.add_argument('--model2', help='.tflite model path', default=default_model2) parser.add_argument('--labels', help='label file path', default=default_labels) #parser.add_argument('--top_k', type=int, default=3, # help='number of categories with highest score to display') #parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) #parser.add_argument('--threshold', type=float, default=0.1, # help='classifier score threshold') args = parser.parse_args() # Load 1NN interpreter = tflite.Interpreter(model_path=args.model) interpreter.allocate_tensors() # Load 2NN interpreter2 = tflite.Interpreter(model_path=args.model2) interpreter2.allocate_tensors() # Load labels labels = load_labels(args.labels) # Load Test Data - ground truth, image test_dir = 'for_evaluation(test_set)/xml' test_img_dir = 'for_evaluation(test_set)/image' filenames = os.listdir(test_dir) full_filenames = [] for filename in filenames: full_filename = os.path.join(test_dir, filename) full_filenames.append(full_filename) total_facedetection_time = 0 face_detection_count = 0 total_maskdetection_time = 0 mask_detection_count = 0 for filename in full_filenames: #print(f'---------------------------', filename, '---------------------------') # get filenum filenum = filename[-9:-4] # filenum = filename.split('/')[2].split('.')[0] # set root from xml tree = ET.parse(filename) root = tree.getroot() # find img directory image_filename = root.find('filename').text image_path = os.path.join(test_img_dir, image_filename) # Load Image, get height and width cv2_im = cv2.imread(image_path, 1) height, width, channels = cv2_im.shape # Get ground truths all = root.findall('object') ground_truths = [] for object in all: # get name, bndbox for labels and bbox name = object.find('name') bndbox = object.find('bndbox') # set test label to name.text (mask or nomask) test_label = name.text bbox = [] for element in bndbox: bbox.append(int(element.text)) xmin, ymin, xmax, ymax = bbox top_left, bottom_right = (xmin, ymax), (xmax, ymin) #color = (0, 0, 255) #thickness = 2 #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) test_bbox = [ bbox[0] / width, bbox[1] / height, bbox[2] / width, bbox[3] / height ] ground_truths.append([test_label, test_bbox]) #print('ground_truths: ', ground_truths) for ground_truth in ground_truths: with open("./mAP/groundtruths/{}.txt".format(filenum), "a+") as file: file.write(str(ground_truth[0]) + ' ') for item in ground_truth[1]: file.write("%s " % item) file.write("\n") # Evaluation of object detection cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) # Latency calculation detect_start_time = time.time() interpreter.invoke() detect_end_time = time.time() total_facedetection_time += detect_end_time - detect_start_time face_detection_count += 1 objs = get_output( interpreter) #score_threshold=args.threshold, top_k=args.top_k) #print('detection result:', objs) for i in range(len(objs)): if objs[i].id != 0: continue if objs[i].score > 1: continue obj_bbox = list(objs[i].bbox) if any(edge > 1 for edge in obj_bbox): continue xmin, ymin, xmax, ymax = obj_bbox xmin, ymin, xmax, ymax = int(xmin * width), int( ymin * height), int(xmax * width), int(ymax * height) unnorm = [xmin, ymin, xmax, ymax] top_left, bottom_right = (xmin, ymax), (xmax, ymin) #color = (255, 0, 0) #thickness = 2 #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) pil_im2 = Image.fromarray(cv2_im_rgb[ymin:ymax, xmin:xmax]) common.set_input2(interpreter2, pil_im2) # Latency calculation mask_start_time = time.time() interpreter2.invoke() mask_end_time = time.time() output_data = common.output_tensor2(interpreter2) total_maskdetection_time += mask_end_time - mask_start_time mask_detection_count += 1 # print(output_data) mask = output_data[0] withoutMask = output_data[1] print('mask_percentage: ', mask, ', nomask_percentage: ', withoutMask) if mask > withoutMask: label = "mask" score = mask * objs[i].score else: label = "nomask" score = withoutMask * objs[i].score #print(obj_bbox, label, score) with open("./mAP/2NN_CPU_8bit_detections/{}.txt".format(filenum), "a+") as file: file.write(label + ' ') file.write(str(score) + ' ') for item in unnorm: file.write("%s " % item) file.write("\n") #window_name = 'Image' #cv2.imshow(window_name, cv2_im) #cv2.waitKey() #print('-------------------------------next file----------------------------------------------------------') avg_face = total_facedetection_time / face_detection_count avg_mask = total_maskdetection_time / mask_detection_count print('Average Face Detection Time: ', avg_face) print('Average Mask Detection Time: ', avg_mask) print('Average Total Inference Time: ', avg_face + avg_mask)
def main(): if (edgetpu == 1): mdl = model_edgetpu else: mdl = model interpreter, labels = cm.load_model(model_dir, mdl, lbl, edgetpu) fps = 1 arr_dur = [0, 0, 0] #while cap.isOpened(): while True: start_time = time.time() #----------------Capture Camera Frame----------------- start_t0 = time.time() ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = cv2.flip(cv2_im, 0) cv2_im = cv2.flip(cv2_im, 1) cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) arr_dur[0] = time.time() - start_t0 #cm.time_elapsed(start_t0,"camera capture") #---------------------------------------------------- #-------------------Inference--------------------------------- start_t1 = time.time() cm.set_input(interpreter, pil_im) interpreter.invoke() objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k) arr_dur[1] = time.time() - start_t1 #cm.time_elapsed(start_t1,"inference") #---------------------------------------------------- #-----------------other------------------------------------ start_t2 = time.time() track_object(objs, labels) #tracking <<<<<<< if cv2.waitKey(1) & 0xFF == ord('q'): break cv2_im = append_text_img1(cv2_im, objs, labels, arr_dur, arr_track_data) ret, jpeg = cv2.imencode('.jpg', cv2_im) pic = jpeg.tobytes() #Flask streaming yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + pic + b'\r\n\r\n') arr_dur[2] = time.time() - start_t2 fps = round(1.0 / (time.time() - start_time), 1) print("*********FPS: ", fps, "************") cap.release() cv2.destroyAllWindows()
def main(): #efault_model_dir = './all_models' # Set model # default_model = './1NN/quantized/one_nn11_edgetpu.tflite' # Coral ver default_model = './1NN/quantized/one_nn_det_100_3.tflite' # GPU ver default_labels = 'face_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default = default_model) parser.add_argument('--labels', help='label file path', default = default_labels) #parser.add_argument('--top_k', type=int, default=5, # help='number of categories with highest score to display') #parser.add_argument('--threshold', type=float, default=0.1, # help='classifier score threshold') args = parser.parse_args() # Load 1NN interpreter = tflite.Interpreter(model_path = args.model) input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.allocate_tensors() # Load labels labels = load_labels(args.labels) # Load Test Data - ground truth, image test_dir = 'for_evaluation(test_set)/xml' test_img_dir = 'for_evaluation(test_set)/image' filenames = os.listdir(test_dir) full_filenames = [] for filename in filenames: full_filename = os.path.join(test_dir, filename) full_filenames.append(full_filename) total_maskdetection_time = 0 mask_detection_count = 0 for filename in full_filenames: print(f'---------------------------', filename, '---------------------------') # get filenum filenum = filename[-9:-4] # filenum = filename.split('/')[2].split('.')[0] # set root from xml tree = ET.parse(filename) root = tree.getroot() # find img directory image_filename = root.find('filename').text image_path = os.path.join(test_img_dir, image_filename) # Load Image, get height and width cv2_im = cv2.imread(image_path,1) height, width, channels = cv2_im.shape # Get ground truths all = root.findall('object') ground_truths = [] for object in all: # get name, bndbox for labels and bbox name = object.find('name') bndbox = object.find('bndbox') # set test label to name.text (mask or nomask) test_label = name.text bbox = [] for element in bndbox: bbox.append(int(element.text)) xmin, ymin, xmax, ymax = bbox top_left, bottom_right = (xmin, ymax), (xmax, ymin) color = (0, 0, 255) thickness = 2 cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) test_bbox = [bbox[0]/width, bbox[1]/height, bbox[2]/width, bbox[3]/height] ground_truths.append([test_label, test_bbox]) #print('ground_truths: ', ground_truths) for ground_truth in ground_truths: with open("./mAP/groundtruths/{}.txt".format(filenum), "a+") as file: file.write(str(ground_truth[0]) + ' ') for item in ground_truth[1]: file.write("%s " % item) file.write("\n") # Evaluation of object detection cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) # Latency calculation mask_start_time = time.time() interpreter.invoke() mask_end_time = time.time() total_maskdetection_time += mask_end_time - mask_start_time mask_detection_count += 1 objs, count = get_output(interpreter) # score_threshold=args.threshold, top_k=args.top_k) print('detection result 갯수:', len(objs), 'count: ', count) print(objs) for i in range(count): #if objs[i].id != 0 and objs[i].id != 1: # continue #if objs[i].score > 1: # continue obj_bbox = list(objs[i].bbox) #if any(edge > 1 for edge in obj_bbox): # continue #if any(np.isnan(edge) for edge in obj_bbox): # continue xmin, ymin, xmax, ymax = obj_bbox xmin, ymin, xmax, ymax = int(xmin*width), int(ymin*height), int(xmax*width), int(ymax*height) unnorm = [xmin, ymin, xmax, ymax] #print(xmin, ymin, xmax, ymax) top_left, bottom_right = (xmin, ymax), (xmax, ymin) color = (255, 0, 0) thickness = 2 #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) if objs[i].id == 0: label = "nomask" elif objs[i].id == 1: label = "mask" score = objs[i].score #print(obj_bbox, label, score) with open("./mAP/1NN_CPU_8bit_detections/{}.txt".format(filenum), "a+") as file: file.write(label + ' ') file.write(str(score) + ' ') for item in unnorm: file.write("%s " % item) file.write("\n") window_name = 'image' #cv2.imshow(window_name, cv2_im) #cv2.waitKey() avg_mask = total_maskdetection_time/mask_detection_count print('Average Total Inference Time: ', avg_mask)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # csv writer f = open('face_output.csv', 'w') with f: fnames = [ 'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax', 'score' ] writer = csv.DictWriter(f, fieldnames=fnames) writer.writeheader() # read frames for image_path in sorted( glob.glob('/home/mendel/dataset/Store/frames/Camera01/*.jpg')): image_name = os.path.splitext(os.path.basename(image_path))[0] #print(image_name) pil_im = Image.open(image_path) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) (width, height) = pil_im.size idx = -1 for obj in objs: x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) score = obj.score label = 'face' idx += 1 writer.writerow({ 'timestamp': image_name, 'idx': idx, 'label': label, 'width': width, 'height': height, 'xmin': x0, 'ymin': y0, 'xmax': x1, 'ymax': y1, 'score': score })
def main(): default_model_dir = '/Users/octavian/Projects/Python3_projects/cars-counting/all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) # interpreter = tflite.Interpreter(args.model, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) interpreter = tf.lite.Interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) detection_threshold = 0.5 dist_estimator = ForwardDistanceEstimator() dist_estimator.load_scalers('./extra/scaler_x.save', './extra/scaler_y.save') dist_estimator.load_model( '/Users/octavian/Projects/Python3_projects/cars-counting/all_models/[email protected]', '/Users/octavian/Projects/Python3_projects/cars-counting/all_models/[email protected]' ) frames_until_reset = 0 csv_columns = ["Number", "Type", "Date"] cap = cv2.VideoCapture(0) # fourcc = cv2.VideoWriter_fourcc(*'DIVX') # out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640,352)) ct = CentroidTracker() with open( "output_" + datetime.datetime.today().strftime('%Y-%m-%d') + ".csv", "w") as output_file: writer = csv.DictWriter(output_file, fieldnames=csv_columns) writer.writeheader() while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame frames_until_reset += 1 cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) (h, w) = cv2_im.shape[:2] common.set_input(interpreter, pil_im) interpreter.invoke() objs, boxes, classes, scores, count = get_output( interpreter, score_threshold=args.threshold, top_k=args.top_k) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.int32) scores = np.squeeze(scores) for ind in range(len(boxes)): if scores[ind] > detection_threshold and ( classes[ind] == 2 or classes[ind] == 7 or classes[ind] == 3 or classes[ind] == 0): box = boxes[ind] * np.array([h, w, h, w]) box = np.append(box, classes[ind]) (startY, startX, endY, endX, label) = box.astype("int") distance = dist_estimator.predict_distance( startX, startY, endX, endY) cv2.putText(img=cv2_im, text=str(distance), org=(startX + 30, startY + 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1e-3 * frame.shape[0], color=(255, 255, 255), thickness=2) cv2.rectangle(cv2_im, (startX, startY), (endX, endY), (0, 255, 0), 2) cv2.imshow('Output', cv2_im) cv2.waitKey(1) # out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() # out.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(1) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = imutils.resize(frame, width=640) gray = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2GRAY) prev_yawn_status = yawnStatus cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) rects = detector(gray, 0) for rect in rects: shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] mouth = shape[mStart:mEnd] leftEAR = eye_aspect_ratio(leftEye) rightEAR = eye_aspect_ratio(rightEye) mouEAR = mouth_aspect_ratio(mouth) ear = (leftEAR + rightEAR) / 2.0 leftEyeHull = cv2.convexHull(leftEye) rightEyeHull = cv2.convexHull(rightEye) mouthHull = cv2.convexHull(mouth) if ear < EYE_AR_THRESH: COUNTER += 1 cv2.putText(cv2_im, "Eyes Closed ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) if COUNTER >= EYE_AR_CONSEC_FRAMES: cv2.putText(cv2_im, "DROWSINESS ALERT!", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) if not ALARM_ON: ALARM_ON = True threadStatusQ.put(not ALARM_ON) thread = Thread(target=soundAlert, args=(sound_path, threadStatusQ,)) thread.setDaemon(True) thread.start() else: ALARM_ON=False else: COUNTER = 0 cv2.putText(cv2_im, "Eyes Open ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) if mouEAR > MOU_AR_THRESH: cv2.putText(cv2_im, "Yawning ", (10, 70),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),2) yawnStatus = True output_text = "Yawn Count: " + str(yawns + 1) cv2.putText(cv2_im, output_text, (10,100),cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,0,0),2) else: yawnStatus = False if prev_yawn_status == True and yawnStatus == False: yawns+=1 cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = './all_models' #default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default = default_model) #default=os.path.join(default_model_dir,default_model)) #################### Keondo's Modification ######################### default_model2 = 'mask_detector_quant.tflite' #default_model2 = 'mask_detector_quant_edgetpu.tflite' parser.add_argument('--model2', help='.tflite model path', default=default_model2) #################### Keondo's Modification ######################### parser.add_argument('--labels', help='label file path', default = default_labels) #default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() #Initialize and configure pyttsx3 for warning messages #engine = pyttsx3.init() #rate = engine.getProperty('rate') #engine.setProperty('rate', rate - 50) print('Loading {} with {} labels.'.format(args.model, args.labels)) #interpreter = common.make_interpreter(args.model) interpreter = tflite.Interpreter(model_path = args.model) interpreter.allocate_tensors() #################### Keondo's Modification ######################### #interpreter2 = common.make_interpreter(args.model2) interpreter2 = tflite.Interpreter(model_path = args.model2) interpreter2.allocate_tensors() print('Interpreter 2 loaded') #################### Keondo's Modification ######################### labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) frame_no = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) #################### Keondo's Modification ######################### #print('Interpreter 2 processing start') #pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST) #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 #pil_im2 = np.expand_dims(pil_im2, axis=0) height, width, channels = cv2_im.shape noMaskCount = 0 mask_data = [] i = 0 pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST) #tensor_index = interpreter2.get_input_details()[0]['index'] #set_input2 = interpreter2.tensor(tensor_index)() #input_tensor2(interpreter2)[:,:] = pil_im2 #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 interpreter2.invoke() output_details = interpreter2.get_output_details()[0] output_data = np.squeeze(interpreter2.tensor(output_details['index'])()) #set_input2(pil_im2) interpreter2.set_tensor(tensor_index, pil_im2) """ #for obj in objs: for i in range(len(objs)-1,-1, -1): #x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = list(objs[i].bbox) x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height) pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1]) print("Bf NN: ", frame_no, i, x0, y0) common.set_input2(interpreter2, pil_im2) output_data = common.output_tensor2(interpreter2) interpreter2.invoke() print("Af NN: ", frame_no, i, x0, y0) print("Output data: ", output_data) mask_data.append((len(objs) - 1 - i, output_data)) #qi += 1 j = 0 #for obj in objs: for j in range(len(objs)): #x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = list(objs[j].bbox) x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height) print("2nd loop: ", frame_no, j, x0, y0) print(list(filter(lambda x: x[0] == j, mask_data))) output = list(filter(lambda x: x[0] == j, mask_data)) mask, withoutMask = output[0][1] if mask > withoutMask: labelMask = "Mask (" + str(x0) + "," + str(y0) + ")" color = (255, 0, 0) #blue else: labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")" color = (0, 0, 255) #red noMaskCount += 1 labelMask = "{}: {:.2f}%".format(labelMask, max(mask, withoutMask) * 100) cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2) cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0-10), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2) #j += 1 """ frame_no += 1 #if noMaskCount > 0: # engine.say("There are " + str(noMaskCount) + "people not wearing masks. Please wear a mask") #tensor_index = interpreter2.get_input_details()[0]['index'] #set_input2 = interpreter2.tensor(tensor_index)() #input_tensor2(interpreter2)[:,:] = pil_im2 #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 #set_input2(pil_im2) #interpreter2.set_tensor(tensor_index, pil_im2) #output_details = interpreter2.get_output_details()[0] #output_data = np.squeeze(interpreter2.tensor(output_details['index'])()) """ There is at least 1 reference to internal data in the interpreter in the form of a numpy array or slice. Be sure to only hold the function returned from tensor() if you are using raw data access. """ #print('Interpreter 2 Output data') #print(output_data) #if 'quantization' in output_details: # print('quantization') # print(output_details['quantization']) #elif 'quantization_parameters' in output_details: # print('quantization_parameters') # print(output_details['quantization_parameters']) #else: # print('No quantization') #scales, zero_points, quantized_dimension = output_details['quantization_parameters'] #if scales == 0: # objs2 = output_data - zero_points #else: # objs2 = scales * (output_data - zero_points) #print('Check objs2') #print(objs2) #################### Keondo's Modification ######################### cv2.imshow('frame', cv2_im) #engine.runAndWait() if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=10, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.3, help='classifier score threshold') parser.add_argument('--class_ids', nargs='*', type=int, default=0, help='Array of class id') parser.add_argument('--input_files', default='/home/mendel/dataset/*.jpg', help='Input files') parser.add_argument('--csv_out', default='detect_output.csv', help='csv output file') args = parser.parse_args() if args.class_ids == 0: args.class_ids = [0] print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # csv writer f = open(args.csv_out, 'w') with f: fnames = [ 'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax', 'score' ] writer = csv.DictWriter(f, fieldnames=fnames) writer.writeheader() # read frames inference_time = [] for image_path in sorted(glob.glob(args.input_files)): image_name = os.path.splitext(os.path.basename(image_path))[0] #print(image_name) pil_im = Image.open(image_path) # inference start = time.time() common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k, class_list=args.class_ids) inference_time.append(time.time() - start) # return results (width, height) = pil_im.size idx = -1 for obj in objs: x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) score = obj.score label = labels.get(obj.id, obj.id) idx += 1 writer.writerow({ 'timestamp': image_name, 'idx': idx, 'label': label, 'width': width, 'height': height, 'xmin': x0, 'ymin': y0, 'xmax': x1, 'ymax': y1, 'score': score }) print("Inference time : {:.3f} ms".format( sum(inference_time) * 1000 / len(inference_time))) print("Frames per second : {:.2f} fps".format( len(inference_time) / sum(inference_time)))