示例#1
0
    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(face_interpreter, input_tensor)
        face_interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        objs = get_output(face_interpreter, args.threshold, args.top_k)
        # Get face detected part
        from PIL import Image
        im = Image.fromarray(common.input_tensor(face_interpreter))
        src_w, src_h = src_size
        inf_w, inf_h = inference_size
        results = []
        emo_objs = []
        for obj in objs:
            x0, y0, x1, y1 = list(obj.bbox)
            # Relative coordinates.
            x, y, w, h = x0, y0, x1 - x0, y1 - y0
            # Absolute coordinates, input tensor space.
            x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(
                h * inf_h)
            crop_rectangle = (x, y, x + w, y + h)
            # get face
            face = im.crop(crop_rectangle)
            face = np.array(face)
            # convert to grayscale
            #face = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY)
            print(face.shape)
            face = cv2.resize(face, (224, 224))
            face = face.astype(np.uint8)
            #face /= float(face.max())
            face = np.reshape(face.flatten(), (224, 224, 3))
            # invoke fer interpreter
            common.set_input2(fer_interpreter, face)
            fer_interpreter.invoke()
            # process results
            results = get_emotion(fer_interpreter)
            if len(results) > 0:
                setattr(obj, "id", results[0].id)
                setattr(obj, "score", results[0].score)
                emo_objs.append(obj)
        objs = emo_objs
        end_time = time.monotonic()

        text_lines = []
        if len(objs) > 0:
            text_lines = [
                'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
                'FPS: {} fps'.format(round(next(fps_counter))),
            ]
            for result in results:
                text_lines.append('score={:.2f}: {}'.format(
                    result.score, labels.get(result.id, result.id)))
            #print(' '.join(text_lines))
        return generate_svg(src_size, inference_size, inference_box, objs,
                            labels, text_lines)
示例#2
0
 def user_callback(input_tensor, src_size, inference_box):
   nonlocal fps_counter
   start_time = time.monotonic()
   common.set_input(interpreter, input_tensor)
   interpreter.invoke()
   # For larger input image sizes, use the edgetpu.classification.engine for better performance
   objs = get_output(interpreter, args.threshold, args.top_k)
   # Get face detected part
   from PIL import Image
   im = Image.fromarray(common.input_tensor(interpreter))
   src_w, src_h = src_size
   inf_w, inf_h = inference_size
   results = []
   for obj in objs:
     x0, y0, x1, y1 = list(obj.bbox)
     # Relative coordinates.
     x, y, w, h = x0, y0, x1 - x0, y1 - y0
     # Absolute coordinates, input tensor space.
     x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(h * inf_h)
     crop_rectangle = (x, y, x+w, y+h)
     face_part = im.crop(crop_rectangle)
     # invoke fer interpreter
     common.set_input2(interpreter_fer, face_part)
     interpreter_fer.invoke()
     results = get_output2(interpreter_fer, args.top_k, args.threshold)
     if len(results) > 0:
         setattr(obj, "id", results[0].id)
         setattr(obj, "score", results[0].score)
   
   end_time = time.monotonic()
   text_lines = []
   text_lines = [
       'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
       'FPS: {} fps'.format(round(next(fps_counter))),
   ]
   for result in results:
       text_lines.append('score={:.2f}: {}'.format(result.score, labels.get(result.id, result.id)))
   print(' '.join(text_lines))
   return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)
示例#3
0
def main():
    #default_model_dir = './all_models'

    # Set face detection model
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver
    default_model = './1NN/quantized/two_nn_nomask.tflite'  # GPU ver
    default_labels = 'face_labels.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=default_model)

    # Set mask classification model
    default_model2 = 'mask_detector_quant.tflite'  # GPU ver
    #default_model2 = 'mask_detector_quant_edgetpu.tflite' #Coral ver
    parser.add_argument('--model2',
                        help='.tflite model path',
                        default=default_model2)

    parser.add_argument('--labels',
                        help='label file path',
                        default=default_labels)

    #parser.add_argument('--top_k', type=int, default=3,
    #                    help='number of categories with highest score to display')
    #parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    #parser.add_argument('--threshold', type=float, default=0.1,
    #                    help='classifier score threshold')
    args = parser.parse_args()

    # Load 1NN
    interpreter = tflite.Interpreter(model_path=args.model)
    interpreter.allocate_tensors()

    # Load 2NN
    interpreter2 = tflite.Interpreter(model_path=args.model2)
    interpreter2.allocate_tensors()

    # Load labels
    labels = load_labels(args.labels)

    # Load Test Data - ground truth, image
    test_dir = 'for_evaluation(test_set)/xml'
    test_img_dir = 'for_evaluation(test_set)/image'
    filenames = os.listdir(test_dir)
    full_filenames = []
    for filename in filenames:
        full_filename = os.path.join(test_dir, filename)
        full_filenames.append(full_filename)

    total_facedetection_time = 0
    face_detection_count = 0

    total_maskdetection_time = 0
    mask_detection_count = 0

    for filename in full_filenames:
        #print(f'---------------------------', filename, '---------------------------')
        # get filenum
        filenum = filename[-9:-4]
        # filenum = filename.split('/')[2].split('.')[0]

        # set root from xml
        tree = ET.parse(filename)
        root = tree.getroot()

        # find img directory
        image_filename = root.find('filename').text
        image_path = os.path.join(test_img_dir, image_filename)

        # Load Image, get height and width
        cv2_im = cv2.imread(image_path, 1)
        height, width, channels = cv2_im.shape

        # Get ground truths
        all = root.findall('object')
        ground_truths = []
        for object in all:
            # get name, bndbox for labels and bbox
            name = object.find('name')
            bndbox = object.find('bndbox')

            # set test label to name.text (mask or nomask)
            test_label = name.text
            bbox = []
            for element in bndbox:
                bbox.append(int(element.text))
            xmin, ymin, xmax, ymax = bbox
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            #color = (0, 0, 255)
            #thickness = 2
            #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)
            test_bbox = [
                bbox[0] / width, bbox[1] / height, bbox[2] / width,
                bbox[3] / height
            ]

            ground_truths.append([test_label, test_bbox])

        #print('ground_truths: ', ground_truths)

        for ground_truth in ground_truths:
            with open("./mAP/groundtruths/{}.txt".format(filenum),
                      "a+") as file:
                file.write(str(ground_truth[0]) + ' ')
                for item in ground_truth[1]:
                    file.write("%s " % item)
                file.write("\n")

        # Evaluation of object detection
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)

        # Latency calculation
        detect_start_time = time.time()
        interpreter.invoke()
        detect_end_time = time.time()
        total_facedetection_time += detect_end_time - detect_start_time
        face_detection_count += 1

        objs = get_output(
            interpreter)  #score_threshold=args.threshold, top_k=args.top_k)
        #print('detection result:', objs)

        for i in range(len(objs)):
            if objs[i].id != 0:
                continue
            if objs[i].score > 1:
                continue
            obj_bbox = list(objs[i].bbox)
            if any(edge > 1 for edge in obj_bbox):
                continue
            xmin, ymin, xmax, ymax = obj_bbox
            xmin, ymin, xmax, ymax = int(xmin * width), int(
                ymin * height), int(xmax * width), int(ymax * height)
            unnorm = [xmin, ymin, xmax, ymax]
            top_left, bottom_right = (xmin, ymax), (xmax, ymin)
            #color = (255, 0, 0)
            #thickness = 2
            #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness)

            pil_im2 = Image.fromarray(cv2_im_rgb[ymin:ymax, xmin:xmax])
            common.set_input2(interpreter2, pil_im2)

            # Latency calculation
            mask_start_time = time.time()
            interpreter2.invoke()
            mask_end_time = time.time()
            output_data = common.output_tensor2(interpreter2)

            total_maskdetection_time += mask_end_time - mask_start_time
            mask_detection_count += 1

            # print(output_data)
            mask = output_data[0]
            withoutMask = output_data[1]
            print('mask_percentage: ', mask, ', nomask_percentage: ',
                  withoutMask)

            if mask > withoutMask:
                label = "mask"
                score = mask * objs[i].score
            else:
                label = "nomask"
                score = withoutMask * objs[i].score
            #print(obj_bbox, label, score)

            with open("./mAP/2NN_CPU_8bit_detections/{}.txt".format(filenum),
                      "a+") as file:
                file.write(label + ' ')
                file.write(str(score) + ' ')
                for item in unnorm:
                    file.write("%s " % item)
                file.write("\n")

        #window_name = 'Image'
        #cv2.imshow(window_name, cv2_im)
        #cv2.waitKey()

        #print('-------------------------------next file----------------------------------------------------------')

    avg_face = total_facedetection_time / face_detection_count
    avg_mask = total_maskdetection_time / mask_detection_count
    print('Average Face Detection Time: ', avg_face)
    print('Average Mask Detection Time: ', avg_mask)
    print('Average Total Inference Time: ', avg_face + avg_mask)
示例#4
0
def main():
    #default_model_dir = './all_models'

    # Set face detection model
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver
    # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver
    default_model = './1NN/quantized/two_nn_nomask.tflite'  # GPU ver
    default_labels = 'face_labels.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=default_model)

    # Set mask classification model
    #default_model2 = 'mask_detector_quant.tflite' # GPU ver
    default_model2 = 'mask_detector_quant_v2_edgetpu.tflite'  #Coral ver
    parser.add_argument('--model2',
                        help='.tflite model path',
                        default=default_model2)

    parser.add_argument('--labels',
                        help='label file path',
                        default=default_labels)

    args = parser.parse_args()

    # Load 1NN
    #interpreter = tflite.Interpreter(model_path = args.model)
    #interpreter.allocate_tensors()

    # Load 2NN
    interpreter2 = tflite.Interpreter(
        model_path=args.model2,
        experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')])
    interpreter2.allocate_tensors()

    # Load labels
    labels = load_labels(args.labels)

    # Load Test Data - ground truth, image
    test_dir = 'for_evaluation(test_set)/xml'

    #test_img_dir = 'for_evaluation(2NN)/wo_mask'
    test_img_dir = 'for_evaluation(2NN)'
    #test_img_dir = 'temp'
    filenames = os.listdir(test_img_dir)
    full_filenames = []

    for folder in filenames:
        filenames2 = os.listdir(os.path.join(test_img_dir, folder))
        full_folder = os.path.join(test_img_dir, folder)
        for filename in filenames2:
            full_filename = os.path.join(full_folder, filename)
            full_filenames.append(full_filename)

    #for filename in filenames:
    #    full_filename = os.path.join(test_img_dir, filename)
    #    full_filenames.append(full_filename)

    total_facedetection_time = 0
    face_detection_count = 0

    total_maskdetection_time = 0
    mask_detection_count = 0

    correct_mask_classification_count = 0

    #random.shuffle(full_filenames)

    for filename in full_filenames:
        print(f'---------------------------', filename,
              '---------------------------')
        # get filenum
        filenum = filename[-9:-4]

        image_path = filename

        # Load Image, get height and width
        cv2_im = cv2.imread(image_path, 1)
        height, width, channels = cv2_im.shape

        # Evaluation of object detection
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input2(interpreter2, pil_im)

        # Latency calculation
        mask_start_time = time.time()
        interpreter2.invoke()
        mask_end_time = time.time()

        output_data = common.output_tensor2(interpreter2)

        total_maskdetection_time += mask_end_time - mask_start_time
        mask_detection_count += 1

        mask = output_data[0]
        withoutMask = output_data[1]
        print('mask_percentage: ', mask, ', nomask_percentage: ', withoutMask)

        if mask > withoutMask:
            label = "mask"
            score = mask
        else:
            label = "nomask"
            score = withoutMask

        gt = ''

        filesplit = filename.split('/')
        print(filesplit[-2])
        if filesplit[-2] == 'w_mask':
            gt = 'mask'
        else:
            gt = 'nomask'

        if label == gt:
            print("Correct classification")
            correct_mask_classification_count += 1
        else:
            print("NOT correct classification")

        #if mask_detection_count > 100:
        #    break

    print("Total mask detection count: ", mask_detection_count)
    print("Correct mask classification count: ",
          correct_mask_classification_count)
    print("Accuracy: ",
          correct_mask_classification_count / mask_detection_count)
示例#5
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_labels = 'fer_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default='../all_models/fer_detect_edgetpu.tflite')
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=10,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--camera_idx',
                        type=str,
                        help='Index of which video source to use. ',
                        default=0)
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    # face interpreter
    interpreter_face = common.make_interpreter(
        os.path.join(default_model_dir, default_model))
    interpreter_face.allocate_tensors()
    # fer interpreter
    interpreter_fer = common.make_interpreter(args.model)
    interpreter_fer.allocate_tensors()
    labels = load_labels(args.labels)

    w, h, _ = common.input_image_size(interpreter_face)
    inference_size = (w, h)

    cap = cv2.VideoCapture(args.camera_idx)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        common.set_input(interpreter_face, pil_im)
        interpreter_face.invoke()
        objs = get_output(interpreter_face, args.threshold, args.top_k)
        # Get face detected part
        inf_w, inf_h = inference_size
        results = []
        for obj in objs:
            x0, y0, x1, y1 = list(obj.bbox)
            # Relative coordinates.
            x, y, w, h = x0, y0, x1 - x0, y1 - y0
            # Absolute coordinates, input tensor space.
            x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(
                h * inf_h)
            crop_rectangle = (x, y, x + w, y + h)
            face_part = pil_im.crop(crop_rectangle)
            # invoke fer interpreter
            face_part = np.array(face_part)
            face_part = cv2.cvtColor(face_part, cv2.COLOR_RGB2GRAY)
            face_part = Image.fromarray(face_part)
            common.set_input2(interpreter_fer, face_part)
            interpreter_fer.invoke()
            results = get_output2(interpreter_fer, args.top_k, args.threshold)
            if len(results) > 0:
                setattr(obj, "id", results[0].id)
                setattr(obj, "score", results[0].score)
        # show the results
        cv2_im = append_objs_to_img(cv2_im, results, labels)
        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
示例#6
0
def main():
    default_model_dir = './all_models'
    #default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite'
    default_labels = 'coco_labels.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=default_model)
    #default=os.path.join(default_model_dir,default_model))

    #################### Keondo's Modification #########################
    default_model2 = 'mask_detector_quant.tflite'
    #default_model2 = 'mask_detector_quant_edgetpu.tflite'
    parser.add_argument('--model2',
                        help='.tflite model path',
                        default=default_model2)
    #################### Keondo's Modification #########################

    parser.add_argument('--labels',
                        help='label file path',
                        default=default_labels)
    #default=os.path.join(default_model_dir, default_labels))

    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    #Initialize and configure pyttsx3 for warning messages
    #engine = pyttsx3.init()
    #rate = engine.getProperty('rate')
    #engine.setProperty('rate', rate - 50)

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    #interpreter = common.make_interpreter(args.model)
    interpreter = tflite.Interpreter(model_path=args.model)
    interpreter.allocate_tensors()

    #################### Keondo's Modification #########################
    #interpreter2 = common.make_interpreter(args.model2)
    interpreter2 = tflite.Interpreter(model_path=args.model2)
    interpreter2.allocate_tensors()
    print('Interpreter 2 loaded')
    #################### Keondo's Modification #########################

    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(args.camera_idx)

    frame_no = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        #cv2_im = append_objs_to_img(cv2_im, objs, labels)

        #################### Keondo's Modification #########################
        #print('Interpreter 2 processing start')
        #pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST)
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2
        #pil_im2 = np.expand_dims(pil_im2, axis=0)

        height, width, channels = cv2_im.shape

        noMaskCount = 0

        mask_data = []

        i = 0

        #for obj in objs:
        for i in range(len(objs) - 1, -1, -1):
            #x0, y0, x1, y1 = list(obj.bbox)
            x0, y0, x1, y1 = list(objs[i].bbox)
            x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                x1 * width), int(y1 * height)
            pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1])
            print("Bf NN: ", frame_no, i, x0, y0)
            common.set_input2(interpreter2, pil_im2)
            output_data = common.output_tensor2(interpreter2)
            interpreter2.invoke()
            print("Af NN: ", frame_no, i, x0, y0)
            print("Output data: ", output_data)
            mask_data.append((len(objs) - 1 - i, output_data))
            #qi += 1

        j = 0

        #for obj in objs:
        for j in range(len(objs)):
            #x0, y0, x1, y1 = list(obj.bbox)
            x0, y0, x1, y1 = list(objs[j].bbox)
            x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                x1 * width), int(y1 * height)

            print("2nd loop: ", frame_no, j, x0, y0)
            print(list(filter(lambda x: x[0] == j, mask_data)))

            output = list(filter(lambda x: x[0] == j, mask_data))

            mask, withoutMask = output[0][1]

            if mask > withoutMask:
                labelMask = "Mask (" + str(x0) + "," + str(y0) + ")"
                color = (255, 0, 0)  #blue
            else:
                labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")"
                color = (0, 0, 255)  #red
                noMaskCount += 1

            labelMask = "{}: {:.2f}%".format(labelMask,
                                             max(mask, withoutMask) * 100)

            cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2)
            cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0 - 10),
                                 cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
            #j += 1

        frame_no += 1
        #if noMaskCount > 0:
        #    engine.say("There are " + str(noMaskCount) + "people not wearing masks. Please wear a mask")

        #tensor_index = interpreter2.get_input_details()[0]['index']
        #set_input2 = interpreter2.tensor(tensor_index)()
        #input_tensor2(interpreter2)[:,:] = pil_im2
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2
        #set_input2(pil_im2)
        #interpreter2.set_tensor(tensor_index, pil_im2)

        #output_details = interpreter2.get_output_details()[0]
        #output_data = np.squeeze(interpreter2.tensor(output_details['index'])())
        """
        There is at least 1 reference to internal data
      in the interpreter in the form of a numpy array or slice. Be sure to
      only hold the function returned from tensor() if you are using raw
      data access.
      """

        #print('Interpreter 2 Output data')
        #print(output_data)
        #if 'quantization' in output_details:
        #    print('quantization')
        #    print(output_details['quantization'])
        #elif 'quantization_parameters' in output_details:
        #    print('quantization_parameters')
        #    print(output_details['quantization_parameters'])
        #else:
        #    print('No quantization')

        #scales, zero_points, quantized_dimension = output_details['quantization_parameters']
        #if scales == 0:
        #    objs2 = output_data - zero_points
        #else:
        #    objs2 = scales * (output_data - zero_points)

        #print('Check objs2')
        #print(objs2)

        #################### Keondo's Modification #########################

        cv2.imshow('frame', cv2_im)
        #engine.runAndWait()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
示例#7
0
def main():
    default_model_dir = './all_models'
    
    #### In order to run on Laptop, tflite file before edgetpu compile should be used ###
    default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'  #wc amend
    #default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite'    
        
    #default_model2 = 'mask_detector_quant_edgetpu.tflite'
    default_model2 = 'mask_detector_quant_v2_edgetpu.tflite'    #wc amend
    #####################################################################################
    
    default_labels = 'coco_labels.txt'      
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default = default_model)
                        #default=os.path.join(default_model_dir,default_model))                        

    parser.add_argument('--model2', help='.tflite model path',
                        default=default_model2)       
    
    parser.add_argument('--labels', help='label file path',
                        default = default_labels)
                        #default=os.path.join(default_model_dir, default_labels))

    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()
         

    print('Loading {} with {} labels.'.format(args.model, args.labels))
        
    ### Some functions in common.make_interpreter needs Edge TPU ########################
    ### Simply use tflite.Interpreter method on laptop
    interpreter = common.make_interpreter(args.model)
    #interpreter = tflite.Interpreter(model_path = args.model)
    interpreter.allocate_tensors()
        
    interpreter2 = common.make_interpreter(args.model2)
    #interpreter2 = tflite.Interpreter(model_path = args.model2)
    #interpreter2 = tflite.Interpreter(model_path = args.model2, experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')]) # wc amend
    interpreter2.allocate_tensors()
    #####################################################################################
    
    print('Interpreter 2 loaded')    
    
    labels = load_labels(args.labels)
    cap = cv2.VideoCapture(args.camera_idx)
    
    #Initialize and configure pygame for warning messages    
    pygame.init()
    beep = pygame.mixer.Sound("coral.wav")
    beep_switch = False
    
    frame_no = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
                
        height, width, channels = cv2_im.shape
        
        noMaskCount = 0
        
        mask_data = []
        
        i = 0
        
        for i in range(len(objs)):            
            x0, y0, x1, y1 = list(objs[i].bbox)
            x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height)                        
            pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1])            
            common.set_input2(interpreter2, pil_im2)

            interpreter2.invoke()                   
            
            output_data = common.output_tensor2(interpreter2)
            mask_data.append((len(objs) - 1 - i, output_data))   
        
        j = 0
                
        for j in range(len(objs)):            
            x0, y0, x1, y1 = list(objs[j].bbox)
            x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height)                       
            output = list(filter(lambda x: x[0] == j, mask_data))     
            
            mask, withoutMask = output[0][1]
                        
            if mask > withoutMask:
                labelMask = "Mask (" + str(x0) + "," + str(y0) + ")" 
                color = (255, 0, 0) #blue
            else:
                labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")"   
                color = (0, 0, 255) #red
                noMaskCount += 1
            
            labelMask = "{}: {:.2f}%".format(labelMask, max(mask, withoutMask) * 100) 
            
            cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2)        
            cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0-10),
                                 cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
            
        frame_no += 1        
        
        if noMaskCount > 0 and frame_no % 10 == 0 and frame_no > 0:
            beep.play()      
        
        
        """ Below code triggers an error
        
        #tensor_index = interpreter2.get_input_details()[0]['index']
        #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2        
        
        #output_details = interpreter2.get_output_details()[0]
        #output_data = np.squeeze(interpreter2.tensor(output_details['index'])())
        
        
            There is at least 1 reference to internal data
          in the interpreter in the form of a numpy array or slice. Be sure to
          only hold the function returned from tensor() if you are using raw
          data access.
       """      
   
        

        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()