Пример #1
0
def detect_objects(threshold = 0.1, top_count=3):
    interpreter = common.make_interpreter(default_model)
    interpreter.allocate_tensors()
    labels = load_labels(default_labels)
    cap = cv2.VideoCapture(default_camera_idx)
    
    if cap.isOpened():
        for i in range(0,15):
            ret, frame = cap.read()
            time.sleep(1/1000)
            if not ret:
                break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=threshold, top_k=top_count)

        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        cv2.imshow('detect', cv2_im)
        cv2.waitKey(50)
        
        def make(obj):        
            return Result(
                percent = int(100 * obj.score),
                label = labels.get(obj.id, 'unknown')
            )
        cap.release()
        return [make(obj) for obj in objs]
Пример #2
0
 def user_callback(input_tensor, src_size, inference_box, mot_tracker):
     nonlocal fps_counter
     start_time = time.monotonic()
     common.set_input(interpreter, input_tensor)
     interpreter.invoke()
     # For larger input image sizes, use the edgetpu.classification.engine for better performance
     objs = get_output(interpreter, args.threshold, args.top_k)
     end_time = time.monotonic()
     detections = []  # np.array([])
     for n in range(0, len(objs)):
         element = []  # np.array([])
         element.append(objs[n].bbox.xmin)
         element.append(objs[n].bbox.ymin)
         element.append(objs[n].bbox.xmax)
         element.append(objs[n].bbox.ymax)
         element.append(objs[n].score)  # print('element= ',element)
         detections.append(element)  # print('dets: ',dets)
     # convert to numpy array #      print('npdets: ',dets)
     detections = np.array(detections)
     trdata = []
     trackerFlag = False
     if detections.any():
         if mot_tracker != None:
             trdata = mot_tracker.update(detections)
             trackerFlag = True
         text_lines = [
             'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
             'FPS: {} fps'.format(round(next(fps_counter))), ]
     if len(objs) != 0:
         return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines, trdata, trackerFlag)
Пример #3
0
def detect_objects(args):
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)
    dirname = args.images

    dirpath = Path('results/' + dirname)
    if dirpath.exists() and dirpath.is_dir():
        shutil.rmtree(dirpath)
    Path("results/" + dirname).mkdir(parents=True, exist_ok=True)

    for filename in glob.glob(dirname + "/*.jpeg"):
        print(filename)
        name = os.path.basename(filename)
        pil_im = Image.open(filename)
        open_cv_image = np.array(pil_im)
        snapshot_im = pil_im
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        #print(objs)
        open_cv_image = append_objs_to_img(open_cv_image, objs, labels)
        cv2_im_rgb = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB)
        (flag, encodedImage) = cv2.imencode(".jpeg", cv2_im_rgb)
        #print(flag)
        #print(encodedImage)
        f = open("./results/" + dirname + "/" + name, "wb")
        f.write(encodedImage)
        f.close()
Пример #4
0
    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()

        # Run hand detection
        common.set_input(detection_interpreter, input_tensor)
        detection_interpreter.invoke()
        detection_results = get_detection_output(detection_interpreter)

        # Resize image and set as input
        buf = input_tensor
        _, map_info = buf.map(Gst.MapFlags.READ)
        np_input = np.ndarray(shape=(h, w, 3),
                              dtype=np.uint8,
                              buffer=map_info.data)
        pil_input = Image.fromarray(np_input)
        pil_input = pil_input.resize((224, 224), Image.NEAREST)
        np_input = np.asarray(pil_input)
        common.input_tensor(classification_interpreter)[:, :] = np_input

        # Run hand classification
        classification_interpreter.invoke()
        classification_results = get_classification_output(
            classification_interpreter)

        end_time = time.monotonic()

        if show_display:
            return generate_svg(src_size, detection_results,
                                classification_results)
        return
Пример #5
0
def main():
  parser = argparse.ArgumentParser(
      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument(
      '--model', help='File path of .tflite model.', default='inception_v4_299_quant_edgetpu.tflite')
  parser.add_argument(
      '--labels', help='File path of labels file.', default='imagenet_labels.txt')
  parser.add_argument(
      '--top_k', help='Number of classifications to list', type=int, default=1)
  args = parser.parse_args()

  print('Initializing TF Lite interpreter...')
  
  interpreter = common.make_interpreter(os.path.join(default_model_dir,args.model))
  interpreter.allocate_tensors()
  labels = load_labels(os.path.join(default_model_dir, args.labels))
  cap = cv2.VideoCapture(0)
  
  while (True):
    ret, frame = cap.read()
    cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_im = Image.fromarray(cv2_im_rgb)
    common.set_input(interpreter, pil_im)
    
    results = classify_image(interpreter, pil_im, args.top_k)
    for label_id, prob in results:
      cv2.putText(frame, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2)
      print('%s: %.5f' % (labels[label_id], prob))

    cv2.imshow('Classification', frame)
    if cv2.waitKey(1) == ord('q'):
      break

  cap.release()
  cv2.destroyAllWindows()
    def get_frame(self):
        if self.video.isOpened() :
            ret, frame = self.video.read()
            cv2_im = frame
            cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im_rgb)
            common.set_input(self.interpreter, pil_im)
            self.interpreter.invoke()
            objs = get_output(self.interpreter, score_threshold=threshold, top_k=top_k)
            cv2_im = append_objs_to_img(cv2_im, objs, self.labels)

            # cv2.imshow('frame', cv2_im)
            for result in objs:
                label = '{:.0f}% {}'.format(100*result.score, self.labels.get(result.id, result.id))
                if self.labels.get(result.id) == "person" and result.score > 0.6:
                    self.file.write("1")
                    self.file.seek(0)
                else:
                    self.file.write("0")
                    self.file.seek(0) 
                print(label)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                return
            sleep(0.2)
            return frame
Пример #7
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(args.camera_idx)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)

        #cv2.imshow('frame', cv2_im)
        #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        #pil_im = Image.fromarray(cv2_im_rgb)
        #handle_image_conversion(pil_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #8
0
def start_detector(args, interpreter, labels, camera_res):
    """ Detect max_objs objects from camera frames. """
    detected_objects.clear()

    try:
        cap = cv2.VideoCapture(args.camera_idx)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            cv2_im = frame

            cv2_im_u = cv2.undistort(cv2_im, common.CAMERA_MATRIX,
                                     common.DIST_COEFFS)

            cv2_im_u_rgb = cv2.cvtColor(cv2_im_u, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im_u_rgb)

            common.set_input(interpreter, pil_im)
            interpreter.invoke()

            objs = common.get_output(interpreter,
                                     score_threshold=args.threshold,
                                     labels=labels)

            # Reject images with number of detected objects > max_objs.
            if len(objs) > args.max_objs:
                continue

            # Create proto buffer message and add to stack.
            for obj in objs:
                detected_object = detection_server_pb2.DetectedObject(
                    label=obj.label,
                    score=obj.score,
                    area=obj.area,
                    centroid=detection_server_pb2.DetectedObject.Centroid(
                        x=obj.centroid.x, y=obj.centroid.y),
                    bbox=detection_server_pb2.DetectedObject.BBox(
                        xmin=obj.bbox.xmin,
                        ymin=obj.bbox.ymin,
                        xmax=obj.bbox.xmax,
                        ymax=obj.bbox.ymax))
                detected_objects.appendleft(detected_object)

            if args.display:
                cv2_im_u = common.annotate_image(objs, camera_res, cv2_im_u)
                cv2.imshow('frame', cv2_im_u)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
    except cv2.error as e:
        print('cv2 error: {e}'.format(e))
    finally:
        cap.release()
        cv2.destroyAllWindows()

    return
Пример #9
0
    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(face_interpreter, input_tensor)
        face_interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        objs = get_output(face_interpreter, args.threshold, args.top_k)
        # Get face detected part
        from PIL import Image
        im = Image.fromarray(common.input_tensor(face_interpreter))
        src_w, src_h = src_size
        inf_w, inf_h = inference_size
        results = []
        emo_objs = []
        for obj in objs:
            x0, y0, x1, y1 = list(obj.bbox)
            # Relative coordinates.
            x, y, w, h = x0, y0, x1 - x0, y1 - y0
            # Absolute coordinates, input tensor space.
            x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(
                h * inf_h)
            crop_rectangle = (x, y, x + w, y + h)
            # get face
            face = im.crop(crop_rectangle)
            face = np.array(face)
            # convert to grayscale
            #face = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY)
            print(face.shape)
            face = cv2.resize(face, (224, 224))
            face = face.astype(np.uint8)
            #face /= float(face.max())
            face = np.reshape(face.flatten(), (224, 224, 3))
            # invoke fer interpreter
            common.set_input2(fer_interpreter, face)
            fer_interpreter.invoke()
            # process results
            results = get_emotion(fer_interpreter)
            if len(results) > 0:
                setattr(obj, "id", results[0].id)
                setattr(obj, "score", results[0].score)
                emo_objs.append(obj)
        objs = emo_objs
        end_time = time.monotonic()

        text_lines = []
        if len(objs) > 0:
            text_lines = [
                'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
                'FPS: {} fps'.format(round(next(fps_counter))),
            ]
            for result in results:
                text_lines.append('score={:.2f}: {}'.format(
                    result.score, labels.get(result.id, result.id)))
            #print(' '.join(text_lines))
        return generate_svg(src_size, inference_size, inference_box, objs,
                            labels, text_lines)
Пример #10
0
    def detect(self, duration_sec):
        if self.error:
            return False
        human_detected = False

        labels_path = os.path.join(self.model_directory, self.labels_file)
        labels = self.load_labels(labels_path)
        utc_timestamp = round(datetime.datetime.now().replace(
            tzinfo=datetime.timezone.utc).timestamp())
        video_name = os.path.join(
            self.log_directory,
            'iot-hub-detect-' + str(utc_timestamp) + '.mp4')
        expire_time = utc_timestamp + duration_sec
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(video_name, fourcc, 24.0, (640, 480))
        print('Monitoring`: ' + self.camera_stream_url + ' for ' +
              str(duration_sec) + ' seconds...')
        while utc_timestamp < expire_time or out != None:
            ret, cv2_im = self.stream.read()
            cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im_rgb)

            common.set_input(self.interpreter, pil_im)
            self.interpreter.invoke()
            objs = self.get_output(self.interpreter,
                                   score_threshold=self.score_threshold,
                                   top_k=self.top_k)
            person_detected, cv2_im = self.append_objs_to_img(
                cv2_im, objs, labels)

            if person_detected:
                if person_detected != human_detected:
                    print('HUMAN DETECTED @ ' + str(utc_timestamp))
                human_detected = True
            if out != None:
                if expire_time <= utc_timestamp:
                    print('Finished writing ' + video_name)
                    out.release()
                    out = None
                    break
                else:
                    out.write(cv2_im)
            else:
                break
            cv2.imshow(self.camera_stream_url, cv2_im)
            utc_timestamp = round(datetime.datetime.now().replace(
                tzinfo=datetime.timezone.utc).timestamp())
            if cv2.waitKey(1) & 0xFF == ord('q'):
                if out != None:
                    out.release()
                    out = None
                break

        self.stream.release()
        cv2.destroyAllWindows()
        return human_detected
Пример #11
0
    def user_callback(input_tensor, src_size, inference_box):
        global access
        global house
        global parcel
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(interpreter, input_tensor)
        interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        results = get_output(interpreter, args.top_k, args.threshold)
        end_time = time.monotonic()
        text_lines = [
            ' ',
            'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
            'FPS: {} fps'.format(round(next(fps_counter))),
        ]
        for result in results:
            text_lines.append('score={:.2f}: {}'.format(
                result.score, labels.get(result.id, result.id)))
            if gpio6.read() == True:
                access = 2
                Gtk.main_quit()
            elif house:
                if labels.get(
                        result.id, result.id
                ) == "tree frog, tree-frog" and result.score > 0.3:
                    access = 1
                    Gtk.main_quit()
                elif (labels.get(result.id, result.id) == "acoustic guitar"
                      or labels.get(result.id, result.id) == "jigsaw puzzle"
                      or labels.get(result.id, result.id) == "jellyfish"
                      or labels.get(result.id, result.id) == "basketball"
                      or labels.get(result.id, result.id)
                      == "soccer ball") and result.score > 0.3:
                    access = 0
                    Gtk.main_quit()
            elif parcel:
                if labels.get(
                        result.id,
                        result.id) == "acoustic guitar" and result.score > 0.3:
                    access = 1
                    Gtk.main_quit()
                elif (labels.get(result.id,
                                 result.id) == "tree frog, tree-frog"
                      or labels.get(result.id, result.id) == "jigsaw puzzle"
                      or labels.get(result.id, result.id) == "jellyfish"
                      or labels.get(result.id, result.id) == "basketball"
                      or labels.get(result.id, result.id)
                      == "soccer ball") and result.score > 0.3:
                    access = 0
                    Gtk.main_quit()

        print(' '.join(text_lines))
        return generate_svg(src_size, text_lines)
Пример #12
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # imagezmq receiver
    image_hub = imagezmq.ImageHub(open_port='tcp://147.47.200.65:35556', REQ_REP=False) # REQ_REP=False: use PUB/SUB (non-block)

    #cap = cv2.VideoCapture(args.camera_idx)

    while True:
        # receive from zmq
        timestamp, frame = image_hub.recv_image()
        dt = datetime.fromtimestamp(timestamp)
        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        start = time.monotonic()
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        inference_time = time.monotonic() - start
        inference_time = 'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time)

        cv2_im = append_objs_to_img(cv2_im, objs, labels, inference_time, dt)
        #cv2_im = cv2.resize(cv2_im, (720, 720))

        cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
        cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
        cv2.imshow("frame", cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()
Пример #13
0
def detect(frame, interpreter, labels, threshold, k):
    """ detects objects in each frame
        returns the frame and a list of objects (boundary box) detected """
    cv2_im = frame
    cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
    pil_im = Image.fromarray(cv2_im_rgb)

    common.set_input(interpreter, pil_im)
    interpreter.invoke()
    objs = get_output(interpreter, score_threshold=threshold, top_k=k)
    cv2_im = append_objs_to_img(cv2_im, objs, labels)

    return cv2_im, objs
Пример #14
0
def detect(frame, interpreter, labels, threshold, k):
    """ Detects objects in each frame using an interpreter engine.
        Returns the editted frame with bounding boxes added and a list of objects detected. """
    cv2_im = frame
    cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
    pil_im = Image.fromarray(cv2_im_rgb)

    common.set_input(interpreter, pil_im)
    interpreter.invoke()
    objs = get_output(interpreter, score_threshold=threshold, top_k=k)
    cv2_im = append_objs_to_img(cv2_im, objs, labels)

    return cv2_im, objs
Пример #15
0
 def user_callback(input_tensor, src_size, inference_box):
   nonlocal fps_counter
   start_time = time.monotonic()
   common.set_input(interpreter, input_tensor)
   interpreter.invoke()
   # For larger input image sizes, use the edgetpu.classification.engine for better performance
   objs = get_output(interpreter, args.threshold, args.top_k)
   end_time = time.monotonic()
   text_lines = [
       'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
       'FPS: {} fps'.format(round(next(fps_counter))),
   ]
   print(' '.join(text_lines))
   return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
Пример #16
0
def gen_frames():  # generate frame by frame from camera
    #Current regression model is unstable, so we take running averages of sin, cos, and angle to stabilize the values
    moving_window = 10
    runsin = np.zeros(moving_window)
    runcos = np.zeros(moving_window)
    runtheta = np.zeros(moving_window)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        #Uncomment the following if you need to use bounding boxes or classification schemes and to use it for overlay

        #objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        #cv2_im = append_objs_to_img(cv2_im, objs, labels)
        sincos = common.output_tensor(interpreter, 0)
        runsin = np.roll(runsin, 1)
        runsin[0] = sincos[0]
        runcos = np.roll(runcos, 1)
        runcos[0] = sincos[1]
        runtheta = np.roll(runtheta, 1)
        runtheta[0] = 180 / np.pi * np.arctan2(sincos[0], sincos[1])
        cv2_im = cv2.putText(cv2_im, 'angle: {}'.format(np.average(runtheta)),
                             (0, 480 - 90), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                             (255, 0, 0), 2)
        cv2_im = cv2.putText(cv2_im, 'sin: {}'.format(np.average(runsin)),
                             (0, 480 - 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                             (255, 0, 0), 2)
        cv2_im = cv2.putText(cv2_im, 'cos: {}'.format(np.average(runcos)),
                             (0, 480 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                             (255, 0, 0), 2)
        sd.putNumber(
            "WOF Angle",
            180 / np.pi * np.arctan2(np.average(runsin), np.average(runcos)))
        ret, buffer = cv2.imencode('.jpg', frame)
        frame = buffer.tobytes()
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n'
               )  # concat frame one by one and show result

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Пример #17
0
def get_features(interpreter, patches):
    
    #patches je lista ndarray objekata, treba pretvoriti u tenzor istih dimenzija ili sliku po sliku ubacivat
    features = []
    for patch in patches:
        if 0 in patch.shape:
            features.append(None)
            continue

        common.set_input(interpreter, Image.fromarray(patch))
        interpreter.invoke()
        feature = common.output_tensor(interpreter, 0)
        features.append(feature)

    return features
Пример #18
0
def detect_object(args):
    global outputFrame, lock

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    if args.videosrc=='dev': 
        cap = cv2.VideoCapture(args.camera_idx)
        
    else:
        if args.netsrc==None:
            print("--videosrc was set to net but --netsrc was not specified")
            sys.exit()
        cap = cv2.VideoCapture(args.netsrc)        

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        snapshot_im = pil_im
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        if args.displayBool == 'True':
            cv2.imshow('frame', cv2_im)

        # acquire the lock, set the output frame, and release the
        # lock
        with lock:
            outputFrame = cv2_im.copy()


        if (time.time() - last_save) >=1:
            take_snapshot(snapshot_im, objs, labels, exclude=args.exclude.split(','), include=args.include.split(','))
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #19
0
 def user_callback(input_tensor, src_size, inference_box):
     nonlocal fps_counter
     start_time = time.monotonic()
     common.set_input(interpreter, input_tensor)
     interpreter.invoke()
     # For larger input image sizes, use the edgetpu.classification.engine for better performance
     results = get_output(interpreter, args.top_k, args.threshold)
     #   print(src_size, results, input_tensor )
     end_time = time.monotonic()
     text_lines = [
         ' ',
         'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
         'FPS: {} fps'.format(round(next(fps_counter))),
     ]
     for result in results:
         text_lines.append('score={:.2f}: {}'.format(
             result.score, labels.get(result.id, result.id)))
     print(' '.join(text_lines))
     return generate_svg(src_size, text_lines)
Пример #20
0
def main():

	if (edgetpu==1):
        mdl = model_edgetpu
    else:
         mdl = model
  
    interpreter, labels =cm.load_model(model_dir,model_edgetpu,lbl,edgetpu)
    
    fps=1
   
    while True:
        start_time=time.time()
        
        #----------------Capture Camera Frame-----------------
        ret, frame = cap.read()
        if not ret:
            break
        
        cv2_im = frame
        cv2_im = cv2.flip(cv2_im, 0)
        cv2_im = cv2.flip(cv2_im, 1)

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
       
        #-------------------Inference---------------------------------
        cm.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k)
        
        #-----------------other------------------------------------
        track_object(objs,labels)#tracking  <<<<<<<
       
        fps = round(1.0 / (time.time() - start_time),1)
        print("*********FPS: ",fps,"************")

        #-----------------------------------------------------
        

    cap.release()
    cv2.destroyAllWindows()
Пример #21
0
 def user_callback(input_tensor, src_size, inference_box):
   nonlocal fps_counter
   start_time = time.monotonic()
   common.set_input(interpreter, input_tensor)
   interpreter.invoke()
   # For larger input image sizes, use the edgetpu.classification.engine for better performance
   objs = get_output(interpreter, args.threshold, args.top_k)
   # Get face detected part
   from PIL import Image
   im = Image.fromarray(common.input_tensor(interpreter))
   src_w, src_h = src_size
   inf_w, inf_h = inference_size
   results = []
   for obj in objs:
     x0, y0, x1, y1 = list(obj.bbox)
     # Relative coordinates.
     x, y, w, h = x0, y0, x1 - x0, y1 - y0
     # Absolute coordinates, input tensor space.
     x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(h * inf_h)
     crop_rectangle = (x, y, x+w, y+h)
     face_part = im.crop(crop_rectangle)
     # invoke fer interpreter
     common.set_input2(interpreter_fer, face_part)
     interpreter_fer.invoke()
     results = get_output2(interpreter_fer, args.top_k, args.threshold)
     if len(results) > 0:
         setattr(obj, "id", results[0].id)
         setattr(obj, "score", results[0].score)
   
   end_time = time.monotonic()
   text_lines = []
   text_lines = [
       'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
       'FPS: {} fps'.format(round(next(fps_counter))),
   ]
   for result in results:
       text_lines.append('score={:.2f}: {}'.format(result.score, labels.get(result.id, result.id)))
   print(' '.join(text_lines))
   return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
Пример #22
0
def classify(model_type=ModelType.General, top_k=1):
    interpreter = common.make_interpreter(model_type.model_path())
    interpreter.allocate_tensors()
    labels = load_labels(model_type.label_path())
    cap = cv2.VideoCapture(0)
    
    if cap.isOpened():
        for i in range(0,15):
            ret, frame = cap.read()
            time.sleep(1/1000)
            if not ret:
                break
        cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        common.set_input(interpreter, pil_im)
        
        results = classify_image(interpreter, pil_im, top_k)
        
        for label_id, prob in results:
            cv2.putText(cv2_im_rgb, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2)
            print('%s: %.5f' % (labels[label_id], prob))
        
        cv2.imshow('Classification', cv2_im_rgb)
        cv2.waitKey(50)
        
        def make(obj):
            fs = "{0}({1})"
            parsed = parse.parse(fs, labels[obj[0]])
            if parsed != None and len(parsed.fixed) > 1:
                tLabel = parsed[1]
            else:
                tLabel = labels[obj[0]]
            return Result(
                label = tLabel,
                percent = int(100 * obj[1])
                )
        cap.release()
        return [make(obj) for obj in results]
Пример #23
0
def main():
    #default_model_dir = './all_models'

    # Set face detection model
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver
    default_model = './1NN/quantized/two_nn_nomask.tflite'  # GPU ver
    default_labels = 'face_labels.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=default_model)

    # Set mask classification model
    default_model2 = 'mask_detector_quant.tflite'  # GPU ver
    #default_model2 = 'mask_detector_quant_edgetpu.tflite' #Coral ver
    parser.add_argument('--model2',
                        help='.tflite model path',
                        default=default_model2)

    parser.add_argument('--labels',
                        help='label file path',
                        default=default_labels)

    #parser.add_argument('--top_k', type=int, default=3,
    #                    help='number of categories with highest score to display')
    #parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    #parser.add_argument('--threshold', type=float, default=0.1,
    #                    help='classifier score threshold')
    args = parser.parse_args()

    # Load 1NN
    interpreter = tflite.Interpreter(model_path=args.model)
    interpreter.allocate_tensors()

    # Load 2NN
    interpreter2 = tflite.Interpreter(model_path=args.model2)
    interpreter2.allocate_tensors()

    # Load labels
    labels = load_labels(args.labels)

    # Load Test Data - ground truth, image
    test_dir = 'for_evaluation(test_set)/xml'
    test_img_dir = 'for_evaluation(test_set)/image'
    filenames = os.listdir(test_dir)
    full_filenames = []
    for filename in filenames:
        full_filename = os.path.join(test_dir, filename)
        full_filenames.append(full_filename)

    total_facedetection_time = 0
    face_detection_count = 0

    total_maskdetection_time = 0
    mask_detection_count = 0

    for filename in full_filenames:
        #print(f'---------------------------', filename, '---------------------------')
        # get filenum
        filenum = filename[-9:-4]
        # filenum = filename.split('/')[2].split('.')[0]

        # set root from xml
        tree = ET.parse(filename)
        root = tree.getroot()

        # find img directory
        image_filename = root.find('filename').text
        image_path = os.path.join(test_img_dir, image_filename)

        # Load Image, get height and width
        cv2_im = cv2.imread(image_path, 1)
        height, width, channels = cv2_im.shape

        # Get ground truths
        all = root.findall('object')
        ground_truths = []
        for object in all:
            # get name, bndbox for labels and bbox
            name = object.find('name')
            bndbox = object.find('bndbox')

            # set test label to name.text (mask or nomask)
            test_label = name.text
            bbox = []
            for element in bndbox:
                bbox.append(int(element.text))
            xmin, ymin, xmax, ymax = bbox
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            #color = (0, 0, 255)
            #thickness = 2
            #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)
            test_bbox = [
                bbox[0] / width, bbox[1] / height, bbox[2] / width,
                bbox[3] / height
            ]

            ground_truths.append([test_label, test_bbox])

        #print('ground_truths: ', ground_truths)

        for ground_truth in ground_truths:
            with open("./mAP/groundtruths/{}.txt".format(filenum),
                      "a+") as file:
                file.write(str(ground_truth[0]) + ' ')
                for item in ground_truth[1]:
                    file.write("%s " % item)
                file.write("\n")

        # Evaluation of object detection
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)

        # Latency calculation
        detect_start_time = time.time()
        interpreter.invoke()
        detect_end_time = time.time()
        total_facedetection_time += detect_end_time - detect_start_time
        face_detection_count += 1

        objs = get_output(
            interpreter)  #score_threshold=args.threshold, top_k=args.top_k)
        #print('detection result:', objs)

        for i in range(len(objs)):
            if objs[i].id != 0:
                continue
            if objs[i].score > 1:
                continue
            obj_bbox = list(objs[i].bbox)
            if any(edge > 1 for edge in obj_bbox):
                continue
            xmin, ymin, xmax, ymax = obj_bbox
            xmin, ymin, xmax, ymax = int(xmin * width), int(
                ymin * height), int(xmax * width), int(ymax * height)
            unnorm = [xmin, ymin, xmax, ymax]
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            #color = (255, 0, 0)
            #thickness = 2
            #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)

            pil_im2 = Image.fromarray(cv2_im_rgb[ymin:ymax, xmin:xmax])
            common.set_input2(interpreter2, pil_im2)

            # Latency calculation
            mask_start_time = time.time()
            interpreter2.invoke()
            mask_end_time = time.time()
            output_data = common.output_tensor2(interpreter2)

            total_maskdetection_time += mask_end_time - mask_start_time
            mask_detection_count += 1

            # print(output_data)
            mask = output_data[0]
            withoutMask = output_data[1]
            print('mask_percentage: ', mask, ', nomask_percentage: ',
                  withoutMask)

            if mask > withoutMask:
                label = "mask"
                score = mask * objs[i].score
            else:
                label = "nomask"
                score = withoutMask * objs[i].score
            #print(obj_bbox, label, score)

            with open("./mAP/2NN_CPU_8bit_detections/{}.txt".format(filenum),
                      "a+") as file:
                file.write(label + ' ')
                file.write(str(score) + ' ')
                for item in unnorm:
                    file.write("%s " % item)
                file.write("\n")

        #window_name = 'Image'
        #cv2.imshow(window_name, cv2_im)
        #cv2.waitKey()

        #print('-------------------------------next file----------------------------------------------------------')

    avg_face = total_facedetection_time / face_detection_count
    avg_mask = total_maskdetection_time / mask_detection_count
    print('Average Face Detection Time: ', avg_face)
    print('Average Mask Detection Time: ', avg_mask)
    print('Average Total Inference Time: ', avg_face + avg_mask)
Пример #24
0
def main():

    if (edgetpu == 1):
        mdl = model_edgetpu
    else:
        mdl = model

    interpreter, labels = cm.load_model(model_dir, mdl, lbl, edgetpu)

    fps = 1
    arr_dur = [0, 0, 0]
    #while cap.isOpened():
    while True:
        start_time = time.time()

        #----------------Capture Camera Frame-----------------
        start_t0 = time.time()
        ret, frame = cap.read()
        if not ret:
            break

        cv2_im = frame
        cv2_im = cv2.flip(cv2_im, 0)
        cv2_im = cv2.flip(cv2_im, 1)

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        arr_dur[0] = time.time() - start_t0
        #cm.time_elapsed(start_t0,"camera capture")
        #----------------------------------------------------

        #-------------------Inference---------------------------------
        start_t1 = time.time()
        cm.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = cm.get_output(interpreter,
                             score_threshold=threshold,
                             top_k=top_k)

        arr_dur[1] = time.time() - start_t1
        #cm.time_elapsed(start_t1,"inference")
        #----------------------------------------------------

        #-----------------other------------------------------------
        start_t2 = time.time()
        track_object(objs, labels)  #tracking  <<<<<<<

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        cv2_im = append_text_img1(cv2_im, objs, labels, arr_dur,
                                  arr_track_data)

        ret, jpeg = cv2.imencode('.jpg', cv2_im)
        pic = jpeg.tobytes()

        #Flask streaming
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + pic + b'\r\n\r\n')

        arr_dur[2] = time.time() - start_t2

        fps = round(1.0 / (time.time() - start_time), 1)
        print("*********FPS: ", fps, "************")

    cap.release()
    cv2.destroyAllWindows()
Пример #25
0
def main():
    #efault_model_dir = './all_models'
    
    # Set model
    # default_model = './1NN/quantized/one_nn11_edgetpu.tflite' # Coral ver
    default_model = './1NN/quantized/one_nn_det_100_3.tflite' # GPU ver
    default_labels = 'face_labels.txt' 

    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default = default_model)                
    
    parser.add_argument('--labels', help='label file path',
                        default = default_labels)

    #parser.add_argument('--top_k', type=int, default=5,
    #                    help='number of categories with highest score to display')
    #parser.add_argument('--threshold', type=float, default=0.1,
    #                    help='classifier score threshold')
    args = parser.parse_args()

    # Load 1NN
    interpreter = tflite.Interpreter(model_path = args.model)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.allocate_tensors()

    # Load labels
    labels = load_labels(args.labels)
    # Load Test Data - ground truth, image
    test_dir = 'for_evaluation(test_set)/xml'
    test_img_dir = 'for_evaluation(test_set)/image'
    filenames = os.listdir(test_dir)
    full_filenames = []
    for filename in filenames:
        full_filename = os.path.join(test_dir, filename)
        full_filenames.append(full_filename)
    
    total_maskdetection_time = 0
    mask_detection_count = 0

    for filename in full_filenames:
        print(f'---------------------------', filename, '---------------------------')
        # get filenum
        filenum = filename[-9:-4]
        # filenum = filename.split('/')[2].split('.')[0]

        # set root from xml
        tree = ET.parse(filename)
        root = tree.getroot()

        # find img directory
        image_filename = root.find('filename').text
        image_path = os.path.join(test_img_dir, image_filename)

        # Load Image, get height and width
        cv2_im = cv2.imread(image_path,1)
        height, width, channels = cv2_im.shape

        # Get ground truths
        all = root.findall('object')
        ground_truths  = []
        for object in all:
            # get name, bndbox for labels and bbox
            name = object.find('name')
            bndbox = object.find('bndbox')

            # set test label to name.text (mask or nomask)
            test_label = name.text
            bbox = []
            for element in bndbox:
                bbox.append(int(element.text))
            xmin, ymin, xmax, ymax = bbox
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            color = (0, 0, 255)
            thickness = 2
            cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)
            test_bbox = [bbox[0]/width, bbox[1]/height, bbox[2]/width, bbox[3]/height]

            ground_truths.append([test_label, test_bbox])
        
        #print('ground_truths: ', ground_truths)

        for ground_truth in ground_truths:
            with open("./mAP/groundtruths/{}.txt".format(filenum), "a+") as file:
                file.write(str(ground_truth[0]) + ' ')
                for item in ground_truth[1]:
                    file.write("%s " % item)
                file.write("\n")

        # Evaluation of object detection
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)

        # Latency calculation
        mask_start_time = time.time()
        interpreter.invoke()
        mask_end_time = time.time()
        total_maskdetection_time += mask_end_time - mask_start_time
        mask_detection_count += 1
        
        objs, count = get_output(interpreter) # score_threshold=args.threshold, top_k=args.top_k)
        print('detection result 갯수:', len(objs), 'count: ', count)
        print(objs)

        for i in range(count):
            #if objs[i].id != 0 and objs[i].id != 1:
            #    continue
            #if objs[i].score > 1:
            #    continue
            obj_bbox = list(objs[i].bbox)
            #if any(edge > 1 for edge in obj_bbox):
            #    continue
            #if any(np.isnan(edge) for edge in obj_bbox):
            #    continue
            xmin, ymin, xmax, ymax = obj_bbox
            xmin, ymin, xmax, ymax = int(xmin*width), int(ymin*height), int(xmax*width), int(ymax*height)
            unnorm = [xmin, ymin, xmax, ymax]
            #print(xmin, ymin, xmax, ymax)
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            color = (255, 0, 0)
            thickness = 2
            #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)

            if objs[i].id == 0:
                label = "nomask"
            elif objs[i].id == 1:
                label = "mask"
            score = objs[i].score
            #print(obj_bbox, label, score)

            with open("./mAP/1NN_CPU_8bit_detections/{}.txt".format(filenum), "a+") as file:
                file.write(label + ' ')
                file.write(str(score) + ' ')
                for item in unnorm:
                    file.write("%s " % item)
                file.write("\n")

        window_name = 'image'
        #cv2.imshow(window_name, cv2_im)
        #cv2.waitKey()

    avg_mask = total_maskdetection_time/mask_detection_count
    print('Average Total Inference Time: ', avg_mask)
Пример #26
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # csv writer
    f = open('face_output.csv', 'w')
    with f:
        fnames = [
            'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin',
            'xmax', 'ymax', 'score'
        ]
        writer = csv.DictWriter(f, fieldnames=fnames)
        writer.writeheader()

        # read frames
        for image_path in sorted(
                glob.glob('/home/mendel/dataset/Store/frames/Camera01/*.jpg')):
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            #print(image_name)
            pil_im = Image.open(image_path)

            common.set_input(interpreter, pil_im)
            interpreter.invoke()
            objs = get_output(interpreter,
                              score_threshold=args.threshold,
                              top_k=args.top_k)
            (width, height) = pil_im.size
            idx = -1
            for obj in objs:
                x0, y0, x1, y1 = list(obj.bbox)
                x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                    x1 * width), int(y1 * height)
                score = obj.score
                label = 'face'
                idx += 1
                writer.writerow({
                    'timestamp': image_name,
                    'idx': idx,
                    'label': label,
                    'width': width,
                    'height': height,
                    'xmin': x0,
                    'ymin': y0,
                    'xmax': x1,
                    'ymax': y1,
                    'score': score
                })
Пример #27
0
def main():
    default_model_dir = '/Users/octavian/Projects/Python3_projects/cars-counting/all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    # interpreter = tflite.Interpreter(args.model, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
    interpreter = tf.lite.Interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)
    detection_threshold = 0.5

    dist_estimator = ForwardDistanceEstimator()
    dist_estimator.load_scalers('./extra/scaler_x.save',
                                './extra/scaler_y.save')
    dist_estimator.load_model(
        '/Users/octavian/Projects/Python3_projects/cars-counting/all_models/[email protected]',
        '/Users/octavian/Projects/Python3_projects/cars-counting/all_models/[email protected]'
    )

    frames_until_reset = 0
    csv_columns = ["Number", "Type", "Date"]

    cap = cv2.VideoCapture(0)
    # fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    # out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640,352))

    ct = CentroidTracker()
    with open(
            "output_" + datetime.datetime.today().strftime('%Y-%m-%d') +
            ".csv", "w") as output_file:
        writer = csv.DictWriter(output_file, fieldnames=csv_columns)
        writer.writeheader()
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            cv2_im = frame
            frames_until_reset += 1

            cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im_rgb)

            (h, w) = cv2_im.shape[:2]
            common.set_input(interpreter, pil_im)
            interpreter.invoke()
            objs, boxes, classes, scores, count = get_output(
                interpreter, score_threshold=args.threshold, top_k=args.top_k)
            boxes = np.squeeze(boxes)
            classes = np.squeeze(classes).astype(np.int32)
            scores = np.squeeze(scores)

            for ind in range(len(boxes)):
                if scores[ind] > detection_threshold and (
                        classes[ind] == 2 or classes[ind] == 7
                        or classes[ind] == 3 or classes[ind] == 0):

                    box = boxes[ind] * np.array([h, w, h, w])
                    box = np.append(box, classes[ind])

                    (startY, startX, endY, endX, label) = box.astype("int")
                    distance = dist_estimator.predict_distance(
                        startX, startY, endX, endY)
                    cv2.putText(img=cv2_im,
                                text=str(distance),
                                org=(startX + 30, startY + 30),
                                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                fontScale=1e-3 * frame.shape[0],
                                color=(255, 255, 255),
                                thickness=2)
                    cv2.rectangle(cv2_im, (startX, startY), (endX, endY),
                                  (0, 255, 0), 2)

            cv2.imshow('Output', cv2_im)
            cv2.waitKey(1)

            # out.write(frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        # out.release()
        cv2.destroyAllWindows()
Пример #28
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(1)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame
        cv2_im = imutils.resize(frame, width=640)
        gray = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2GRAY)
        prev_yawn_status = yawnStatus
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        rects = detector(gray, 0)
        for rect in rects:
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            mouth = shape[mStart:mEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            mouEAR = mouth_aspect_ratio(mouth)
            ear = (leftEAR + rightEAR) / 2.0
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)
            mouthHull = cv2.convexHull(mouth)
            if ear < EYE_AR_THRESH:
                COUNTER += 1
                cv2.putText(cv2_im, "Eyes Closed ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                if COUNTER >= EYE_AR_CONSEC_FRAMES:
                    cv2.putText(cv2_im, "DROWSINESS ALERT!", (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                    if not ALARM_ON:
                        ALARM_ON = True
                        threadStatusQ.put(not ALARM_ON)
                        thread = Thread(target=soundAlert, args=(sound_path, threadStatusQ,))
                        thread.setDaemon(True)
                        thread.start()
                else:
                    ALARM_ON=False
            else:
                COUNTER = 0
                cv2.putText(cv2_im, "Eyes Open ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            if mouEAR > MOU_AR_THRESH:
                cv2.putText(cv2_im, "Yawning ", (10, 70),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),2)
                yawnStatus = True
                output_text = "Yawn Count: " + str(yawns + 1)
                cv2.putText(cv2_im, output_text, (10,100),cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,0,0),2)
            else:
                yawnStatus = False
            if prev_yawn_status == True and yawnStatus == False:
                yawns+=1
        
        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #29
0
def main():
    default_model_dir = './all_models'
    #default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite'
    default_labels = 'coco_labels.txt'  
    
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default = default_model)
                        #default=os.path.join(default_model_dir,default_model))                        
    
    
    #################### Keondo's Modification #########################
    default_model2 = 'mask_detector_quant.tflite'
    #default_model2 = 'mask_detector_quant_edgetpu.tflite'
    parser.add_argument('--model2', help='.tflite model path',
                        default=default_model2)
    #################### Keondo's Modification #########################    
    
    parser.add_argument('--labels', help='label file path',
                        default = default_labels)
                        #default=os.path.join(default_model_dir, default_labels))

    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()
    
    #Initialize and configure pyttsx3 for warning messages    
    #engine = pyttsx3.init()
    #rate = engine.getProperty('rate')
    #engine.setProperty('rate', rate - 50)

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    #interpreter = common.make_interpreter(args.model)
    interpreter = tflite.Interpreter(model_path = args.model)
    interpreter.allocate_tensors()
    
    #################### Keondo's Modification #########################
    #interpreter2 = common.make_interpreter(args.model2)
    interpreter2 = tflite.Interpreter(model_path = args.model2)
    interpreter2.allocate_tensors()
    print('Interpreter 2 loaded')
    #################### Keondo's Modification #########################  
    
    
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(args.camera_idx)

    frame_no = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        
        #################### Keondo's Modification #########################
        #print('Interpreter 2 processing start')        
        #pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST)
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2
        #pil_im2 = np.expand_dims(pil_im2, axis=0)
                
        height, width, channels = cv2_im.shape
        
        noMaskCount = 0
        
        mask_data = []
        
        i = 0
        
        pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST)
        #tensor_index = interpreter2.get_input_details()[0]['index']
        #set_input2 = interpreter2.tensor(tensor_index)()
        #input_tensor2(interpreter2)[:,:] = pil_im2
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2
        interpreter2.invoke()
        
        output_details = interpreter2.get_output_details()[0]
        output_data = np.squeeze(interpreter2.tensor(output_details['index'])())
        
        #set_input2(pil_im2)
        interpreter2.set_tensor(tensor_index, pil_im2)
        
        
        """
        #for obj in objs:
        for i in range(len(objs)-1,-1, -1):
            #x0, y0, x1, y1 = list(obj.bbox)
            x0, y0, x1, y1 = list(objs[i].bbox)
            x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height)                        
            pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1])
            print("Bf NN: ", frame_no, i, x0, y0)
            common.set_input2(interpreter2, pil_im2)
            output_data = common.output_tensor2(interpreter2)
            interpreter2.invoke()       
            print("Af NN: ", frame_no, i, x0, y0)
            print("Output data: ", output_data)
            mask_data.append((len(objs) - 1 - i, output_data))   
            #qi += 1
        
        j = 0
        
        #for obj in objs:
        for j in range(len(objs)):
            #x0, y0, x1, y1 = list(obj.bbox)
            x0, y0, x1, y1 = list(objs[j].bbox)
            x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height)           
            
            print("2nd loop: ", frame_no, j, x0, y0)            
            print(list(filter(lambda x: x[0] == j, mask_data)))
            
            output = list(filter(lambda x: x[0] == j, mask_data))     
            
            mask, withoutMask = output[0][1]
                        
            if mask > withoutMask:
                labelMask = "Mask (" + str(x0) + "," + str(y0) + ")" 
                color = (255, 0, 0) #blue
            else:
                labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")"   
                color = (0, 0, 255) #red
                noMaskCount += 1
            
            labelMask = "{}: {:.2f}%".format(labelMask, max(mask, withoutMask) * 100) 
            
            cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2)        
            cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0-10),
                                 cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
            #j += 1
        """    
        frame_no += 1
        #if noMaskCount > 0:
        #    engine.say("There are " + str(noMaskCount) + "people not wearing masks. Please wear a mask")
            
        #tensor_index = interpreter2.get_input_details()[0]['index']
        #set_input2 = interpreter2.tensor(tensor_index)()
        #input_tensor2(interpreter2)[:,:] = pil_im2
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2
        #set_input2(pil_im2)
        #interpreter2.set_tensor(tensor_index, pil_im2)
        
        
        
        #output_details = interpreter2.get_output_details()[0]
        #output_data = np.squeeze(interpreter2.tensor(output_details['index'])())
        
        
        """
        There is at least 1 reference to internal data
      in the interpreter in the form of a numpy array or slice. Be sure to
      only hold the function returned from tensor() if you are using raw
      data access.
      """
        
        
        #print('Interpreter 2 Output data')
        #print(output_data)
        #if 'quantization' in output_details:
        #    print('quantization')
        #    print(output_details['quantization'])
        #elif 'quantization_parameters' in output_details:
        #    print('quantization_parameters')
        #    print(output_details['quantization_parameters'])
        #else:
        #    print('No quantization')
            
        #scales, zero_points, quantized_dimension = output_details['quantization_parameters']
        #if scales == 0:
        #    objs2 = output_data - zero_points
        #else:
        #    objs2 = scales * (output_data - zero_points)        
        
        #print('Check objs2')
        #print(objs2)
        

        #################### Keondo's Modification #########################    
        

        cv2.imshow('frame', cv2_im)
        #engine.runAndWait()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #30
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=10,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.3,
                        help='classifier score threshold')
    parser.add_argument('--class_ids',
                        nargs='*',
                        type=int,
                        default=0,
                        help='Array of class id')
    parser.add_argument('--input_files',
                        default='/home/mendel/dataset/*.jpg',
                        help='Input files')
    parser.add_argument('--csv_out',
                        default='detect_output.csv',
                        help='csv output file')
    args = parser.parse_args()
    if args.class_ids == 0:
        args.class_ids = [0]

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # csv writer
    f = open(args.csv_out, 'w')
    with f:
        fnames = [
            'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin',
            'xmax', 'ymax', 'score'
        ]
        writer = csv.DictWriter(f, fieldnames=fnames)
        writer.writeheader()

        # read frames
        inference_time = []
        for image_path in sorted(glob.glob(args.input_files)):
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            #print(image_name)
            pil_im = Image.open(image_path)

            # inference
            start = time.time()
            common.set_input(interpreter, pil_im)
            interpreter.invoke()
            objs = get_output(interpreter,
                              score_threshold=args.threshold,
                              top_k=args.top_k,
                              class_list=args.class_ids)
            inference_time.append(time.time() - start)

            # return results
            (width, height) = pil_im.size
            idx = -1
            for obj in objs:
                x0, y0, x1, y1 = list(obj.bbox)
                x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                    x1 * width), int(y1 * height)
                score = obj.score
                label = labels.get(obj.id, obj.id)
                idx += 1
                writer.writerow({
                    'timestamp': image_name,
                    'idx': idx,
                    'label': label,
                    'width': width,
                    'height': height,
                    'xmin': x0,
                    'ymin': y0,
                    'xmax': x1,
                    'ymax': y1,
                    'score': score
                })

        print("Inference time : {:.3f} ms".format(
            sum(inference_time) * 1000 / len(inference_time)))
        print("Frames per second : {:.2f} fps".format(
            len(inference_time) / sum(inference_time)))