Exemplo n.º 1
0
    def __init__(self, config):
        self.config = config
        self.detector = None
        self.classifier_model = None
        self.running_video = False
        self.device = self.config.DEVICE
        if self.device == "x86":
            from libs.detectors.x86.detector import Detector
            from libs.classifiers.x86.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        elif self.device == "EdgeTPU":
            from libs.detectors.edgetpu.detector import Detector
            from libs.classifiers.edgetpu.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        elif self.device == "Jetson":
            from libs.detectors.jetson.detector import Detector
            from libs.classifiers.jetson.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        else:
            raise ValueError('Not supported device named: ', self.device)

        self.image_size = (self.config.DETECTOR_INPUT_SIZE[0],
                           self.config.DETECTOR_INPUT_SIZE[1], 3)
        self.classifier_img_size = (self.config.CLASSIFIER_INPUT_SIZE,
                                    self.config.CLASSIFIER_INPUT_SIZE, 3)
Exemplo n.º 2
0
def main():
    """
    Read input video and process it, the output video will be exported output_video path
     which can be set by input arguments.
    Example: python inference_video.py --config configs/config.json --input_video_path data/video/sample.mov
     --output_video data/videos/output.avi
    """
    argparse = ArgumentParser()
    argparse.add_argument('--config', type=str, help='json config file path')
    argparse.add_argument('--input_video_path', type=str, help='the path of input video', default='')
    argparse.add_argument('--output_video', type=str, help='the name of output video file',
                          default='face_mask_output.avi')
    args = argparse.parse_args()

    config_path = args.config
    cfg = Config(path=config_path)

    if args.input_video_path == '':
        input_path = cfg.APP_VIDEO_PATH
    else:
        input_path = args.input_video_path

    print("INFO: Input video is: ", input_path)
    output_path = args.output_video
    file_name_size = len(output_path.split('/')[-1])
    output_dir = output_path[:-file_name_size]

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print("INFO: The output video will be exported at: ", output_path)

    detector_input_size = (cfg.DETECTOR_INPUT_SIZE[0], cfg.DETECTOR_INPUT_SIZE[1], 3)
    classifier_img_size = (cfg.CLASSIFIER_INPUT_SIZE, cfg.CLASSIFIER_INPUT_SIZE, 3)

    device = cfg.DEVICE
    detector = None
    classifier = None
    output_vidwriter = None

    if device == "x86":
        from libs.detectors.x86.detector import Detector
        from libs.classifiers.x86.classifier import Classifier

    elif device == "EdgeTPU":
        from libs.detectors.edgetpu.detector import Detector
        from libs.classifiers.edgetpu.classifier import Classifier
    else:
        raise ValueError('Not supported device named: ', device)

    detector = Detector(cfg)
    classifier_model = Classifier(cfg)
    input_cap = cv.VideoCapture(input_path)

    print("INFO: Start inferencing")
    frame_id = 0
    while (input_cap.isOpened()):
        ret, raw_img = input_cap.read()
        if output_vidwriter is None:
            output_vidwriter = cv.VideoWriter(output_path, cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 24,
                                              (raw_img.shape[1], raw_img.shape[0]))
            height, width = raw_img.shape[:2]

        if ret == False:
            break
        _, cv_image = input_cap.read()
        if np.shape(cv_image) != ():
            resized_image = cv.resize(cv_image, tuple(detector_input_size[:2]))
            rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB)
            objects_list = detector.inference(rgb_resized_image)
            faces = []
            cordinates = []
            for obj in objects_list:
                if 'bbox' in obj.keys():
                    face_bbox = obj['bbox']  # [ymin, xmin, ymax, xmax]
                    xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]], width)
                    ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]], height)
                    croped_face = cv_image[int(ymin):int(ymin) + (int(ymax) - int(ymin)),
                                  int(xmin):int(xmin) + (int(xmax) - int(xmin))]
                    # Resizing input image
                    croped_face = cv.resize(croped_face, tuple(classifier_img_size[:2]))
                    croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB)
                    # Normalizing input image to [0.0-1.0]
                    croped_face = croped_face / 255.0
                    faces.append(croped_face)
                    cordinates.append([int(xmin), int(ymin), int(xmax), int(ymax)])

            faces = np.array(faces)
            face_mask_results, scores = classifier_model.inference(faces)
            for i, cor in enumerate(cordinates):
                if face_mask_results[i] == 1:
                    color = (0, 0, 255)
                elif face_mask_results[i] == 0:
                    color = (0, 255, 0)
                else:
                    color = (0, 0, 0)

                cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]), color, 2)
            output_vidwriter.write(raw_img)
            print('{} Frames number are processed. {} fps'.format(frame_id, detector.fps))
            frame_id = frame_id + 1
        else:
            continue

    input_cap.release()
    output_vidwriter.release()
    print('INFO: Finish:) Output video is exported at: ', output_path)
Exemplo n.º 3
0
def main():
    """
    Read input images and process them, the output images will be exported output_dir
     which can be set by input arguments.
    Example: python inference_images.py --config configs/config.json --input_image_dir data/images
     --output_image_dir output_images
    """
    argparse = ArgumentParser()
    argparse.add_argument('--config', type=str, help='json config file path')
    argparse.add_argument('--input_image_dir',
                          type=str,
                          help='the directory of input images')
    argparse.add_argument('--output_image_dir',
                          type=str,
                          help='the directory of output images',
                          default='output_images')
    args = argparse.parse_args()

    config_path = args.config
    cfg = Config(path=config_path)

    input_dir = args.input_image_dir
    print("INFO: The directory of input images is: ", input_dir)
    output_dir = args.output_image_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.isdir(output_dir):
        print(
            '"{} "output directory is not exists please make the directory before running this script.'
            .format(output_dir))
        exit(1)

    print("INFO: The output images will be exported at: ", output_dir)

    detector_input_size = (cfg.DETECTOR_INPUT_SIZE[0],
                           cfg.DETECTOR_INPUT_SIZE[1], 3)
    classifier_img_size = (cfg.CLASSIFIER_INPUT_SIZE,
                           cfg.CLASSIFIER_INPUT_SIZE, 3)

    device = cfg.DEVICE
    detector = None
    classifier = None

    if device == "x86":
        from libs.detectors.x86.detector import Detector
        from libs.classifiers.x86.classifier import Classifier

    elif device == "EdgeTPU":
        from libs.detectors.edgetpu.detector import Detector
        from libs.classifiers.edgetpu.classifier import Classifier
    elif device == "Jetson":
        from libs.detectors.jetson.detector import Detector
        from libs.classifiers.jetson.classifier import Classifier
    else:
        raise ValueError('Not supported device named: ', device)

    detector = Detector(cfg)
    classifier_model = Classifier(cfg)

    print("INFO: Start inferencing")
    for filename in os.listdir(input_dir):
        image_path = os.path.join(input_dir, filename)
        raw_img = cv.imread(image_path)
        if np.shape(raw_img) != ():
            height, width, _ = raw_img.shape
            resized_image = cv.resize(raw_img, tuple(detector_input_size[:2]))
            rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB)
            objects_list = detector.inference(rgb_resized_image)
            faces = []
            cordinates = []
            cordinates_head = []
            for obj in objects_list:
                if 'bbox' in obj.keys():
                    face_bbox = obj['bbox']  # [ymin, xmin, ymax, xmax]
                    xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]],
                                             width)
                    ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]],
                                             height)
                    croped_face = raw_img[int(ymin):int(ymin) +
                                          (int(ymax) - int(ymin)),
                                          int(xmin):int(xmin) +
                                          (int(xmax) - int(xmin))]
                    # Resizing input image
                    croped_face = cv.resize(croped_face,
                                            tuple(classifier_img_size[:2]))
                    croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB)
                    # Normalizing input image to [0.0-1.0]
                    croped_face = croped_face / 255.0
                    faces.append(croped_face)
                    cordinates.append(
                        [int(xmin), int(ymin),
                         int(xmax), int(ymax)])
                if 'bbox_head' in obj.keys():
                    head_bbox = obj['bbox_head']  # [ymin, xmin, ymax, xmax]
                    xmin, xmax = np.multiply([head_bbox[1], head_bbox[3]],
                                             width)
                    ymin, ymax = np.multiply([head_bbox[0], head_bbox[2]],
                                             height)
                    cordinates_head.append(
                        [int(xmin), int(ymin),
                         int(xmax), int(ymax)])

            faces = np.array(faces)
            if np.shape(faces)[0] == 0:
                print("can not find face at ".image_path)
                continue

            face_mask_results, scores = classifier_model.inference(faces)
            for i, cor in enumerate(cordinates):
                if face_mask_results[i] == 1:
                    color = (0, 0, 255)
                elif face_mask_results[i] == 0:
                    color = (0, 255, 0)
                else:
                    color = (0, 0, 0)

                cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]),
                             color, 2)
            for cor in cordinates_head:
                cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]),
                             (200, 200, 200), 2)
            cv.imwrite(os.path.join(output_dir, filename), raw_img)
            print('image "{}" are processed. {} fps'.format(
                image_path, detector.fps))
        else:
            continue

    print('INFO: Finish:) Output images are exported at: ', output_dir)
Exemplo n.º 4
0
class FaceMaskAppEngine:
    """
    Perform detector which detects faces from input video,
    and classifier to classify croped faces to face or mask class
    :param config: Is a Config instance which provides necessary parameters.
    """
    def __init__(self, config):
        self.config = config
        self.detector = None
        self.classifier_model = None
        self.running_video = False
        self.device = self.config.DEVICE
        if self.device == "x86":
            from libs.detectors.x86.detector import Detector
            from libs.classifiers.x86.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        elif self.device == "EdgeTPU":
            from libs.detectors.edgetpu.detector import Detector
            from libs.classifiers.edgetpu.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        elif self.device == "Jetson":
            from libs.detectors.jetson.detector import Detector
            from libs.classifiers.jetson.classifier import Classifier
            self.detector = Detector(self.config)
            self.classifier_model = Classifier(self.config)
        else:
            raise ValueError('Not supported device named: ', self.device)

        self.image_size = (self.config.DETECTOR_INPUT_SIZE[0],
                           self.config.DETECTOR_INPUT_SIZE[1], 3)
        self.classifier_img_size = (self.config.CLASSIFIER_INPUT_SIZE,
                                    self.config.CLASSIFIER_INPUT_SIZE, 3)

    def set_ui(self, ui):
        self.ui = ui

    def __process(self, cv_image):
        # Resize input image to resolution
        self.resolution = self.config.APP_VIDEO_RESOLUTION
        cv_image = cv.resize(cv_image, tuple(self.resolution))

        resized_image = cv.resize(cv_image, tuple(self.image_size[:2]))
        rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB)
        objects_list = self.detector.inference(rgb_resized_image)
        [w, h] = self.resolution
        #objects_list = [{'id': '1-0', 'bbox': [.1, .2, .5, .5]}, {'id': '1-1', 'bbox': [.3, .1, .5, .5]}]
        faces = []
        for obj in objects_list:
            if 'bbox' in obj.keys():
                face_bbox = obj['bbox']  # [ymin, xmin, ymax, xmax]
                xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]],
                                         self.resolution[0])
                ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]],
                                         self.resolution[1])
                croped_face = cv_image[int(ymin):int(ymin) +
                                       (int(ymax) - int(ymin)),
                                       int(xmin):int(xmin) +
                                       (int(xmax) - int(xmin))]
                # Resizing input image
                croped_face = cv.resize(croped_face,
                                        tuple(self.classifier_img_size[:2]))
                croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB)
                # Normalizing input image to [0.0-1.0]
                croped_face = croped_face / 255.0
                faces.append(croped_face)

        faces = np.array(faces)
        face_mask_results, scores = self.classifier_model.inference(faces)

        # TODO: it could be optimized by the returned dictionary from openpifpaf (returining List instead dict)
        [w, h] = self.resolution

        idx = 0
        for obj in objects_list:
            if 'bbox' in obj.keys():
                obj['face_label'] = face_mask_results[idx]
                obj['score'] = scores[idx]
                idx = idx + 1
                box = obj["bbox"]
                x0 = box[1]
                y0 = box[0]
                x1 = box[3]
                y1 = box[2]
                obj["bbox"] = [x0, y0, x1, y1]

        return cv_image, objects_list

    def process_video(self, video_uri):
        input_cap = cv.VideoCapture(video_uri)

        if (input_cap.isOpened()):
            print('opened video ', video_uri)
        else:
            print('failed to load video ', video_uri)
            return

        self.running_video = True
        while input_cap.isOpened() and self.running_video:
            _, cv_image = input_cap.read()
            if np.shape(cv_image) != ():
                cv_image, objects = self.__process(cv_image)
            else:
                continue
            self.ui.update(cv_image, objects)
        input_cap.release()
        self.running_video = False