Пример #1
0
def emotion_init(**params):
    # Load driver
    emotion_model = params.get('emotion_model')
    if not emotion_model:
        return

    LOG.info('------------------------')
    LOG.info('Loading emotion model at %s...' % emotion_model)
    drv = driver.load_driver('openvino')
    # Instantiate driver
    global emotion_serving
    emotion_serving = drv()
    emotion_serving.load_model(
        emotion_model,
        device='CPU',
        flexible_batch_size=True,
    )

    LOG.info('Loaded.')
    LOG.info('------------------------')
Пример #2
0
    def __init__(self, model_path, use_tensor_rt=False):
        self._model_path = model_path
        drv = driver.load_driver('tensorflow')
        self.serving = drv()
        _model = 'opencv_face_detector_uint8.pb'
        if use_tensor_rt:
            _model = 'opencv_face_detector_uint8_rt_fp16.pb'

        self.serving.load_model(os.path.join(self._model_path, _model),
                                inputs='data:0',
                                outputs='mbox_loc:0,mbox_conf_flatten:0')
        configFile = self._model_path + "/detector.pbtxt"
        self.net = cv2.dnn.readNetFromTensorflow(None, configFile)

        self.prior = np.fromfile(self._model_path + '/mbox_priorbox.np',
                                 np.float32)
        self.prior = np.reshape(self.prior, (1, 2, 35568))
        self.threshold = 0.5
        ##Dry run
        self.bboxes(np.zeros((300, 300, 3), np.uint8))
Пример #3
0
 def _load_driver(self):
     if self.serving is None:
         driver_name = 'openvino'
         if '_edgetpu' in self.face_detection_path and '.tflite' in self.face_detection_path:
             driver_name = 'edgetpu'
         drv = driver.load_driver(driver_name)
         # Instantinate driver
         self.serving = drv()
         self.serving.load_model(
             self.face_detection_path,
             # device=self.device,
             flexible_batch_size=True,
         )
         self.input_name = list(self.serving.inputs.keys())[0]
         if driver_name == 'openvino':
             self.input_size = tuple(
                 list(self.serving.inputs.values())[0][:-3:-1])
         else:
             self.input_size = tuple(
                 list(self.serving.inputs.values())[0][-2:-4:-1])
         self.output_name = list(self.serving.outputs.keys())[0]
def main():
    args = parse_args()
    face_driver = driver.load_driver('openvino')()
    face_driver.load_model(args.face_model)

    train_a = sorted(glob.glob(os.path.join(args.data_dir, '*-1.tiff')))
    train_b = sorted(glob.glob(os.path.join(args.data_dir, '*-2.tiff')))

    output_a = os.path.join(args.output_dir, 'trainA')
    output_b = os.path.join(args.output_dir, 'trainB')
    os.makedirs(output_a, exist_ok=True)
    os.makedirs(output_b, exist_ok=True)

    print('Processing images...')
    for img_a_path, img_b_path in zip(train_a, train_b):
        img_a = cv2.imread(img_a_path)
        img_b = cv2.imread(img_b_path)
        base_a, _ = os.path.splitext(os.path.basename(img_a_path))
        base_b, _ = os.path.splitext(os.path.basename(img_b_path))

        boxes_a = hook.get_boxes(face_driver, img_a, threshold=0.2)
        boxes_b = hook.get_boxes(face_driver, img_b, threshold=0.2)

        if len(boxes_a) != 1 or len(boxes_b) != 1:
            print(f'Found {len(boxes_a)} boxes: {img_a_path}')
            print(f'Found {len(boxes_b)} boxes: {img_b_path}')
            continue

        img_a = hook.crop_by_box(img_a, boxes_a[0], margin=0.05)
        img_b = hook.crop_by_box(img_b, boxes_b[0], margin=0.05)

        cv2.imwrite(os.path.join(output_a, base_a + '.jpg'), img_a)
        cv2.imwrite(os.path.join(output_b, base_b + '.jpg'), img_b)
        print('.', end='')
        sys.stdout.flush()

    print()
    print(f'Done. Processed images are saved in {output_a} and {output_b}')
Пример #5
0
def face_init(**params):
    threshold = params.get('face_threshold')
    if threshold:
        PARAMS['face_threshold'] = float(threshold)
    # Load driver
    face_model = params.get('face_model')
    if not face_model:
        return

    LOG.info('------------------------')
    LOG.info('Loading face model at %s...' % face_model)
    drv = driver.load_driver('openvino')
    # Instantiate driver
    global face_serving
    face_serving = drv()
    face_serving.load_model(
        face_model,
        device='CPU',
        flexible_batch_size=True,
    )

    LOG.info('Loaded.')
    LOG.info('------------------------')
Пример #6
0
from ml_serving.drivers import driver
import cv2
import os
import numpy as np

drv = driver.load_driver("model")()

drv.load_model('kuberlab-demo/person-mask:1.82.85')

video = cv2.VideoCapture(0)

while True:
    _, frame = video.read()
    if frame is None:
        break
    serv_img = cv2.resize(frame[:, :, ::-1], (160, 160))
    serv_img = serv_img.astype(np.float32) / 255
    result = drv.predict({'image': np.expand_dims(serv_img, axis=0)})
    mask = result['output']
    mask = mask[0] * 255
    #mask[mask < 10] = 0
    mask = mask.astype(np.uint8)
    mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))
    mask = mask.astype(np.float32) / 255
    frame = frame.astype(np.float32) * np.expand_dims(mask, axis=2)
    frame = frame.astype(np.uint8)
    cv2.imshow('Video', frame)
    key = cv2.waitKey(1)
    if key in [ord('q'), 202, 27]:
        break
Пример #7
0
def main(args):
    # Get the paths for the corresponding images
    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            utils.print_fun('Do not use mlboard.')
        else:
            utils.print_fun('Use mlboard parameters logging.')
            use_mlboard = True

    image_size = args.image_size
    driver_name = 'openvino'
    if os.path.isdir(args.model) and os.path.exists(
            os.path.join(args.model, 'saved_model.pb')):
        driver_name = 'tensorflow'
        image_size = 112

    data = {
        'image_size': image_size,
        'driver_name': driver_name,
        'model_path': args.model,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    img_paths, actual_issame = load_dataset(args.data_dir)
    drv = driver.load_driver(driver_name)
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device='CPU',
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    utils.print_fun('Runnning forward pass on dataset images')

    # Enqueue one epoch of image paths and labels
    nrof_images = len(img_paths)

    data = {
        'num_images': nrof_images,
        'num_classes': nrof_images // 4,
    }
    update_data(data, use_mlboard, mlboard)

    embedding_size = list(serving.outputs.values())[0][-1]
    nrof_batches = int(np.ceil(float(nrof_images) / args.batch_size))
    emb_array = np.zeros((nrof_images, embedding_size))

    # TODO(nmakhotkin): cache embeddings by image paths (because image pairs
    #  are duplicated and no need to do inference on them)
    for i in range(nrof_batches):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = img_paths[start_index:end_index]
        probe_imgs = dataset.load_data(paths_batch,
                                       image_size,
                                       normalization=args.normalization)
        emb = _predict(serving, probe_imgs)
        emb_array[start_index:end_index, :] = emb
        if i % 5 == 4:
            utils.print_fun('{}/{}'.format(i + 1, nrof_batches))
            sys.stdout.flush()
    utils.print_fun('')
    embeddings = emb_array

    tpr, fpr, accuracy, val, val_std, far = helpers.evaluate(
        embeddings,
        actual_issame,
        nrof_folds=args.lfw_nrof_folds,
        distance_metric=args.distance_metric,
        subtract_mean=args.subtract_mean)

    rpt = report(tpr, fpr, accuracy, val, val_std, far)
    with open('report.html', 'w') as f:
        f.write(rpt)
    update_data({'#documents.report.html': rpt}, use_mlboard, mlboard)
Пример #8
0
def eval_video(**kwargs):

    logger.setLevel(logging.INFO)

    cap = cv2.VideoCapture(kwargs['video_source'])
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(
        *'MP4V')  # int(cap.get(cv2.CAP_PROP_FOURCC))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    frame_count = -1
    iter_count = 0
    each_frame = kwargs['each_frame']
    save_dir = kwargs['save_dir']
    frames_limit = kwargs['frames_limit']

    video_writer = None
    video_output = kwargs['video_output']
    if video_output is not None:
        logger.info(
            f'Write video to {video_output} ({width}x{height}, {fps/each_frame} fps) ...'
        )
        video_writer = cv2.VideoWriter(video_output,
                                       fourcc,
                                       fps / each_frame,
                                       frameSize=(width, height))

    write_report_to = None
    data = {}
    if kwargs['report_output']:
        write_report_to = kwargs['report_output']

    tracker = OnlineTracker(**kwargs)
    timer = Timer()
    results = []
    wait_time = 1

    drv = driver.load_driver('tensorflow')

    logger.info(f'init person detection driver...')
    person_detect_driver = drv()
    person_detect_model = kwargs['person_detect_model']
    logger.info(f'loading person detection model {person_detect_model}...')
    person_detect_driver.load_model(person_detect_model)
    logger.info(f'person detection model {person_detect_model} loaded')

    try:
        while True:

            frame_count += 1
            if frames_limit is not None and frame_count > frames_limit:
                logger.warn('frames limit {} reached'.format(frames_limit))
                break

            # read each X bgr frame
            frame = cap.read()  # bgr
            if frame_count % each_frame > 0:
                continue

            if isinstance(frame, tuple):
                frame = frame[1]
            if frame is None:
                logger.warn('video capturing finished')
                break

            if iter_count % 20 == 0:
                logger.info(
                    'Processing frame {} (iteration {}) ({:.2f} fps)'.format(
                        frame_count, iter_count,
                        1. / max(1e-5, timer.average_time)))

            det_tlwhs, det_scores = detect_persons_tf(person_detect_driver,
                                                      frame,
                                                      threshold=.5)

            # run tracking
            timer.tic()
            online_targets = tracker.update(frame, det_tlwhs, None)
            online_tlwhs = []
            online_ids = []
            for t in online_targets:
                online_tlwhs.append(t.tlwh)
                online_ids.append(t.track_id)
            timer.toc()

            if write_report_to:

                for i, id in enumerate(online_ids):
                    if id not in data:
                        data[id] = {
                            'intervals': [],
                            'images': [],
                            'last_image': None,
                        }
                    di = data[id]['intervals']
                    if len(di) == 0 or di[-1][1] < frame_count - each_frame:
                        if len(di) > 0 and di[-1][0] == di[-1][1]:
                            di = di[:-1]
                        di.append([frame_count, frame_count])
                    else:
                        di[-1][1] = frame_count
                    if not data[id]['last_image'] or data[id][
                            'last_image'] < frame_count - fps * 10:
                        data[id]['last_image'] = frame_count
                        tlwh = [max(0, int(o)) for o in online_tlwhs[i]]
                        pers_img = frame[tlwh[1]:tlwh[1] + tlwh[3],
                                         tlwh[0]:tlwh[0] + tlwh[2]].copy()
                        if max(pers_img.shape[0], pers_img.shape[1]) > 100:
                            coef = max(pers_img.shape[0],
                                       pers_img.shape[1]) / 100
                            pers_img = cv2.resize(
                                pers_img, (int(pers_img.shape[1] / coef),
                                           int(pers_img.shape[0] / coef)))
                        _, pers_img = cv2.imencode('.jpeg', pers_img)
                        data[id]['images'].append(
                            base64.b64encode(pers_img).decode())

            # save results
            frame_id = frame_count  # or make it incremental?
            results.append((frame_id + 1, online_tlwhs, online_ids))

            online_im = vis.plot_tracking(frame,
                                          online_tlwhs,
                                          online_ids,
                                          frame_id=frame_id,
                                          fps=1. / timer.average_time)

            for tlwh in det_tlwhs:
                cv2.rectangle(
                    online_im,
                    (tlwh[0], tlwh[1]),  # (left, top)
                    (tlwh[0] + tlwh[2], tlwh[1] + tlwh[3]),  # (right, bottom)
                    (0, 255, 0),
                    1,
                )

            if kwargs['show_image']:
                cv2.imshow('online_im', online_im)
            if save_dir is not None:
                save_to = os.path.join(save_dir, '{:05d}.jpg'.format(frame_id))
                cv2.imwrite(save_to, online_im)

            if video_writer is not None:
                video_writer.write(cv2.resize(online_im, (width, height)))

            key = cv2.waitKey(wait_time)
            key = chr(key % 128).lower()
            if key in [ord('q'), 202,
                       27]:  # 'q' or Esc or 'q' in russian layout
                exit(0)
            elif key == 'p':
                cv2.waitKey(0)
            elif key == 'a':
                wait_time = int(not wait_time)

            iter_count += 1

    except (KeyboardInterrupt, SystemExit) as e:
        logger.info('Caught %s: %s' % (e.__class__.__name__, e))
    finally:
        cv2.destroyAllWindows()
        if video_writer is not None:
            logger.info('Written video to %s.' % video_output)
            video_writer.release()

        if write_report_to:

            for i in data:
                di = data[i]
                di['index'] = i
                di['duration'] = sum([i[1] - i[0] for i in di['intervals']])
                di['duration_sec'] = '{:.2f}'.format(di['duration'] / fps)
                di['intervals_str'] = ', '.join([
                    '{:.2f}-{:.2f}'.format(i[0] / fps, i[1] / fps)
                    for i in di['intervals']
                ])

            data = data.values()
            data = sorted(data, key=lambda x: x['duration'], reverse=True)

            # prepare html
            tpl = jinja2.Template(template)

            html = tpl.render(data=data)
            with open(write_report_to, 'w') as f:
                f.write(html)

            update_data({'#documents.persons.html': html}, use_mlboard,
                        mlboard)
Пример #9
0
def main(args):
    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            print('Do not use mlboard.')
        else:
            print('Use mlboard parameters logging.')
            use_mlboard = True

    if args.use_split_dataset:
        dataset_tmp = facenet.get_dataset(args.data_dir)
        train_set, test_set = split_dataset(dataset_tmp,
                                            args.min_nrof_images_per_class,
                                            args.nrof_train_images_per_class)
        if args.mode == 'TRAIN':
            dataset = train_set
        elif args.mode == 'CLASSIFY':
            dataset = test_set
    else:
        dataset = facenet.get_dataset(args.data_dir)

    update_data({'mode': args.mode}, use_mlboard, mlboard)

    # Check that there are at least one training image per class
    for cls in dataset:
        assert len(
            cls.image_paths
        ) > 0, 'There must be at least one image for each class in the dataset'

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print('Number of classes: %d' % len(dataset))
    print('Number of images: %d' % len(paths))
    data = {
        'num_classes': len(dataset),
        'num_images': len(paths),
        'model_path': args.model,
        'image_size': args.image_size,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    # Load the model
    print('Loading feature extraction model')

    # Load driver
    drv = driver.load_driver(args.driver)
    # Instantinate driver
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device=args.device,
    )

    # Run forward pass to calculate embeddings
    print('Calculating features for images')
    nrof_images = len(paths)
    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images /
                                           args.batch_size))
    emb_array = np.zeros((nrof_images, 512))
    for i in range(nrof_batches_per_epoch):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = paths[start_index:end_index]
        for j in range(end_index - start_index):
            print('Batch {} <-> {}'.format(paths_batch[j],
                                           labels[start_index + j]))
        images = facenet.load_data(paths_batch, False, False, args.image_size)

        if serving.driver_name == 'tensorflow':
            feed_dict = {'input:0': images, 'phase_train:0': False}
        elif serving.driver_name == 'openvino':
            input_name = list(serving.inputs.keys())[0]

            # Transpose image for channel first format
            images = images.transpose([0, 3, 1, 2])
            feed_dict = {input_name: images}
        else:
            raise RuntimeError('Driver %s currently not supported' %
                               serving.driver_name)

        outputs = serving.predict(feed_dict)
        emb_array[start_index:end_index, :] = list(outputs.values())[0]

    classifier_filename_exp = os.path.expanduser(args.classifier_filename)

    if args.mode == 'TRAIN':
        # Train classifier
        print('Training classifier')
        model = svm.SVC(kernel='linear', probability=True)
        model.fit(emb_array, labels)

        # Create a list of class names
        class_names = [cls.name.replace('_', ' ') for cls in dataset]
        print('Classes:')
        print(class_names)

        # Saving classifier model
        with open(classifier_filename_exp, 'wb') as outfile:
            pickle.dump((model, class_names), outfile, protocol=2)
        print('Saved classifier model to file "%s"' % classifier_filename_exp)

    elif args.mode == 'CLASSIFY':
        # Classify images
        print('Testing classifier')
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        print('Loaded classifier model from file "%s"' %
              classifier_filename_exp)

        predictions = model.predict_proba(emb_array)
        best_class_indices = np.argmax(predictions, axis=1)
        best_class_probabilities = predictions[
            np.arange(len(best_class_indices)), best_class_indices]

        for i in range(len(best_class_indices)):
            print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]],
                                     best_class_probabilities[i]))

        accuracy = np.mean(np.equal(best_class_indices, labels))
        update_data({'accuracy': accuracy}, use_mlboard, mlboard)
        print('Accuracy: %.3f' % accuracy)

        if args.upload_model and accuracy >= args.upload_threshold:
            timestamp = datetime.datetime.now().strftime('%s')
            model_name = 'facenet-classifier'
            version = '1.0.0-%s-%s' % (args.driver, timestamp)

            print('Uploading model as %s:%s' % (model_name, version))
            upload_model(use_mlboard, mlboard, classifier_filename_exp,
                         model_name, version)
Пример #10
0
def main(args):

    output_dir = os.path.expanduser(args.output_dir)
    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes.txt')
    align_filename = os.path.join(output_dir, 'align.pkl')

    align_data_args = dict(vars(args))
    # the next arguments can be changed w/o changing aligned images
    del align_data_args['complementary']
    del align_data_args['input_dir']
    del align_data_args['output_dir']

    align_data = {}
    clear_output_dir = True
    if args.complementary:
        if os.path.isfile(align_filename):
            print_fun("Check previous align data")
            with open(align_filename, 'rb') as infile:
                (align_data_args_loaded,
                 align_data_loaded) = pickle.load(infile)
                if align_data_args == align_data_args_loaded:
                    print_fun("Loaded data about %d aligned classes" %
                              len(align_data_loaded))
                    align_data = align_data_loaded
                    clear_output_dir = False
                else:
                    print_fun(
                        "Previous align data is for another arguments, skipped"
                    )

    if clear_output_dir:
        print_fun("Clearing output dir")
        shutil.rmtree(output_dir, ignore_errors=True)

    if not os.path.isdir(output_dir):
        print_fun("Creating output dir")
        os.makedirs(output_dir)

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)

    print_fun('Creating networks and loading parameters')

    # Load driver
    drv = driver.load_driver("openvino")
    # Instantinate driver
    serving = drv()
    serving.load_model(
        args.face_detection_path,
        device="CPU",
        flexible_batch_size=True,
    )

    bg_rm_drv = bg_remove.get_driver(args.bg_remove_path)

    input_name = list(serving.inputs.keys())[0]
    output_name = list(serving.outputs.keys())[0]

    threshold = 0.5

    min_face_area = args.min_face_size**2

    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            output_class_dir_created = False
            if cls.name in align_data:
                align_data_class = align_data[cls.name]
            else:
                align_data_class = {}
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir,
                                               filename + '.png')
                if not os.path.exists(output_filename):
                    try:
                        img = cv2.imread(image_path,
                                         cv2.IMREAD_COLOR).astype(np.float32)
                    except Exception as e:
                        error_message = '{}: {}'.format(image_path, e)
                        print_fun('ERROR: %s' % error_message)
                        continue

                    img_hash = hashlib.sha1(img.tostring()).hexdigest()
                    if image_path in align_data_class and align_data_class[
                            image_path] == img_hash:
                        print_fun("%s - cached" % image_path)
                        continue
                    align_data_class[image_path] = hashlib.sha1(
                        img.tostring()).hexdigest()
                    print_fun(image_path)

                    if len(img.shape) <= 2:
                        print_fun('WARNING: Unable to align "%s", shape %s' %
                                  (image_path, img.shape))
                        text_file.write('%s\n' % output_filename)
                        continue

                    if bg_rm_drv is not None:
                        img = bg_rm_drv.apply_mask(img)

                    serving_img = cv2.resize(img, (300, 300),
                                             interpolation=cv2.INTER_AREA)
                    serving_img = np.transpose(serving_img, [2, 0, 1]).reshape(
                        [1, 3, 300, 300])
                    raw = serving.predict({input_name: serving_img
                                           })[output_name].reshape([-1, 7])
                    # 7 values:
                    # class_id, label, confidence, x_min, y_min, x_max, y_max
                    # Select boxes where confidence > factor
                    bboxes_raw = raw[raw[:, 2] > threshold]
                    bboxes_raw[:, 3] = bboxes_raw[:, 3] * img.shape[1]
                    bboxes_raw[:, 5] = bboxes_raw[:, 5] * img.shape[1]
                    bboxes_raw[:, 4] = bboxes_raw[:, 4] * img.shape[0]
                    bboxes_raw[:, 6] = bboxes_raw[:, 6] * img.shape[0]

                    bounding_boxes = np.zeros([len(bboxes_raw), 5])

                    bounding_boxes[:, 0:4] = bboxes_raw[:, 3:7]
                    bounding_boxes[:, 4] = bboxes_raw[:, 2]

                    # Get the biggest box: find the box with largest square:
                    # (y1 - y0) * (x1 - x0) - size of box.
                    bbs = bounding_boxes
                    area = (bbs[:, 3] - bbs[:, 1]) * (bbs[:, 2] - bbs[:, 0])

                    if len(area) < 1:
                        print_fun('WARNING: Unable to align "%s", n_faces=%s' %
                                  (image_path, len(area)))
                        text_file.write('%s\n' % output_filename)
                        continue

                    num = np.argmax(area)
                    if area[num] < min_face_area:
                        print_fun(
                            'WARNING: Face found but too small - about {}px '
                            'width against required minimum of {}px. Try'
                            ' adjust parameter --min-face-size'.format(
                                int(np.sqrt(area[num])), args.min_face_size))
                        continue

                    bounding_boxes = np.stack([bbs[num]])

                    imgs = openvino_detection.get_images(
                        img,
                        bounding_boxes,
                        face_crop_size=args.image_size,
                        face_crop_margin=args.margin,
                        prewhiten=False,
                    )
                    for i, cropped in enumerate(imgs):
                        nrof_successfully_aligned += 1
                        bb = bounding_boxes[i]
                        filename_base, file_extension = os.path.splitext(
                            output_filename)
                        output_filename_n = "{}_{}{}".format(
                            filename_base, i, file_extension)

                        text_file.write(
                            '%s %d %d %d %d\n' %
                            (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
                        if not output_class_dir_created:
                            output_class_dir_created = True
                            if not os.path.exists(output_class_dir):
                                os.makedirs(output_class_dir)
                        cv2.imwrite(output_filename_n, cropped)

            align_data[cls.name] = align_data_class

    with open(align_filename, 'wb') as align_file:
        pickle.dump((align_data_args, align_data), align_file, protocol=2)

    print_fun('Total number of images: %d' % nrof_images_total)
    print_fun('Number of successfully aligned images: %d' %
              nrof_successfully_aligned)
    build_id = os.environ.get('BUILD_ID', None)
    if os.environ.get('PROJECT_ID', None) and (build_id is not None):
        from mlboardclient.api import client
        client.update_task_info({'aligned_location': output_dir})
Пример #11
0
def main(args):
    algorithms = ["kNN", "SVM"]

    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            print_fun('Do not use mlboard.')
        else:
            print_fun('Use mlboard parameters logging.')
            use_mlboard = True

    if args.use_split_dataset:
        dataset_tmp = facenet.get_dataset(args.data_dir)
        train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class,
                                            args.nrof_train_images_per_class)
        if args.mode == 'TRAIN':
            dataset = train_set
        elif args.mode == 'CLASSIFY':
            dataset = test_set
    else:
        dataset = facenet.get_dataset(args.data_dir)

    update_data({'mode': args.mode}, use_mlboard, mlboard)

    # Check that there are at least one training image per class
    for cls in dataset:
        if len(cls.image_paths) == 0:
            print_fun('WARNING: %s: There are no aligned images in this class.' % cls)

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print_fun('Number of classes: %d' % len(dataset))
    print_fun('Number of images: %d' % len(paths))
    data = {
        'num_classes': len(dataset),
        'num_images': len(paths),
        'model_path': args.model,
        'image_size': args.image_size,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    # Load the model
    print_fun('Loading feature extraction model')

    # Load and instantinate driver
    drv = driver.load_driver(args.driver)
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device=args.device,
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    print_fun('Calculating features for images')

    noise_count = max(0, args.noise_count) if args.noise else 0
    emb_args = {
        'model': args.model,
        'use_split_dataset': args.use_split_dataset,
        'noise': noise_count > 0,
        'noise_count': noise_count,
        'flip': args.flip,
        'image_size': args.image_size,
        'min_nrof_images_per_class': args.min_nrof_images_per_class,
        'nrof_train_images_per_class': args.nrof_train_images_per_class,
    }

    stored_embeddings = {}
    if args.mode == 'TRAIN':
        embeddings_filename = os.path.join(
            args.data_dir,
            "embeddings-%s.pkl" % hashlib.md5(json.dumps(emb_args, sort_keys=True).encode()).hexdigest(),
        )
        if os.path.isfile(embeddings_filename):
            print_fun("Found stored embeddings data, loading...")
            with open(embeddings_filename, 'rb') as embeddings_file:
                stored_embeddings = pickle.load(embeddings_file)

    total_time = 0.

    nrof_images = len(paths)

    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / args.batch_size))
    epp = embeddings_per_path(noise_count, args.flip)
    embeddings_size = nrof_images * epp

    emb_array = np.zeros((embeddings_size, 512))
    fit_labels = []

    emb_index = 0
    for i in range(nrof_batches_per_epoch):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = paths[start_index:end_index]
        labels_batch = labels[start_index:end_index]

        # has_not_stored_embeddings = False
        paths_batch_load, labels_batch_load = [], []

        for j in range(end_index - start_index):
            # print_fun(os.path.split(paths_batch[j]))
            cls_name = dataset[labels_batch[j]].name
            cached = True
            if cls_name not in stored_embeddings or paths_batch[j] not in stored_embeddings[cls_name]:
                # has_not_stored_embeddings = True
                cached = False
                paths_batch_load.append(paths_batch[j])
                labels_batch_load.append(labels_batch[j])
            else:
                embeddings = stored_embeddings[cls_name][paths_batch[j]]
                emb_array[emb_index:emb_index + len(embeddings), :] = stored_embeddings[cls_name][paths_batch[j]]
                fit_labels.extend([labels_batch[j]] * len(embeddings))
                emb_index += len(embeddings)

            print_fun('Batch {} <-> {} {} {}'.format(
                paths_batch[j], labels_batch[j], cls_name, "cached" if cached else "",
            ))

        if len(paths_batch_load) == 0:
            continue

        images = load_data(paths_batch_load, labels_batch_load, args.image_size, noise_count, args.flip)

        if serving.driver_name == 'tensorflow':
            feed_dict = {'input:0': images, 'phase_train:0': False}
        elif serving.driver_name == 'openvino':
            input_name = list(serving.inputs.keys())[0]
            # Transpose image for channel first format
            images = images.transpose([0, 3, 1, 2])
            feed_dict = {input_name: images}
        else:
            raise RuntimeError('Driver %s currently not supported' % serving.driver_name)

        t = time.time()
        outputs = serving.predict(feed_dict)
        total_time += time.time() - t

        emb_outputs = list(outputs.values())[0]

        if args.mode == "TRAIN":
            for n, e in enumerate(emb_outputs):
                cls_name = dataset[labels_batch_load[n]].name
                if cls_name not in stored_embeddings:
                    stored_embeddings[cls_name] = {}
                path = paths_batch_load[n]
                if path not in stored_embeddings[cls_name]:
                    stored_embeddings[cls_name][path] = []
                stored_embeddings[cls_name][path].append(e)

        emb_array[emb_index:emb_index + len(images), :] = emb_outputs
        fit_labels.extend(labels_batch_load)

        emb_index += len(images)

    # average_time = total_time / embeddings_size * 1000
    # print_fun('Average time: %.3fms' % average_time)

    classifiers_path = os.path.expanduser(args.classifiers_path)

    if args.mode == 'TRAIN':

        # Save embeddings
        with open(embeddings_filename, 'wb') as embeddings_file:
            pickle.dump(stored_embeddings, embeddings_file, protocol=2)

        # Clear (or create) classifiers directory
        try:
            shutil.rmtree(classifiers_path, ignore_errors=True)
        except:
            pass
        os.makedirs(classifiers_path)

        # Create a list of class names
        dataset_class_names = [cls.name for cls in dataset]
        class_names = [cls.replace('_', ' ') for cls in dataset_class_names]
        print_fun('Classes:')
        print_fun(class_names)

        class_stats = [{} for _ in range(len(dataset_class_names))]
        for cls in stored_embeddings:
            class_stats[dataset_class_names.index(cls)] = {
                'images': len(stored_embeddings[cls]),
                'embeddings': sum(len(e) for e in stored_embeddings[cls].values()),
            }

        # Train classifiers
        for algorithm in algorithms:
            if args.only_algorithm is not None and algorithm != args.only_algorithm:
                continue

            print_fun('Classifier algorithm %s' % algorithm)
            # update_data({'classifier_algorithm': args.algorithm}, use_mlboard, mlboard)
            if algorithm == 'SVM':
                model = svm.SVC(kernel='linear', probability=True)
            elif algorithm == 'kNN':
                # n_neighbors = int(round(np.sqrt(len(emb_array))))
                model = neighbors.KNeighborsClassifier(n_neighbors=args.knn_neighbors, weights='distance')
            else:
                raise RuntimeError("Classifier algorithm %s not supported" % algorithm)

            model.fit(emb_array, fit_labels)

            # Saving classifier model
            classifier_filename = get_classifier_path(classifiers_path, algorithm)
            with open(classifier_filename, 'wb') as outfile:
                pickle.dump((model, class_names, class_stats), outfile, protocol=2)
            print_fun('Saved classifier model to file "%s"' % classifier_filename)
            # update_data({'average_time_%s': '%.3fms' % average_time}, use_mlboard, mlboard)

    elif args.mode == 'CLASSIFY':

        summary_accuracy = 1

        # Classify images
        for algorithm in algorithms:
            print_fun('Testing classifier %s' % algorithm)
            classifier_filename = get_classifier_path(classifiers_path, algorithm)
            with open(classifier_filename, 'rb') as infile:
                (model, class_names, class_stats) = pickle.load(infile)

            print_fun('Loaded classifier model from file "%s"' % classifier_filename)

            predictions = model.predict_proba(emb_array)
            best_class_indices = np.argmax(predictions, axis=1)
            if isinstance(model, neighbors.KNeighborsClassifier):
                param_name = 'distance'
                # clf_name = "knn"
                (closest_distances, _) = model.kneighbors(emb_array)
                eval_values = closest_distances[:, 0]
            elif isinstance(model, svm.SVC):
                param_name = 'probability'
                # clf_name = "svm"
                eval_values = predictions[np.arange(len(best_class_indices)), best_class_indices]
            else:
                raise RuntimeError("Unsupported classifier type: %s" % type(model))

            for i in range(len(best_class_indices)):
                predicted = best_class_indices[i]
                if predicted == labels[i]:
                    print_fun('%4d  %s: %s %.3f' % (
                        i, class_names[predicted], param_name, eval_values[i],
                    ))
                else:
                    print_fun('%4d  %s: %s %.3f, WRONG! Should be %s.' % (
                        i, class_names[predicted], param_name, eval_values[i], class_names[labels[i]]),
                              )

            accuracy = np.mean(np.equal(best_class_indices, labels))
            summary_accuracy = min(summary_accuracy, accuracy)

            rpt = confusion(labels, best_class_indices, class_names,
                            use_mlboard and not args.skip_draw_confusion_matrix)
            data = {
                'accuracy': accuracy,
                # 'average_time': '%.3fms' % average_time
            }
            if not args.skip_draw_confusion_matrix:
                data['#documents.confusion_matrix.html'] = rpt
            update_data(data, use_mlboard, mlboard)

            print_fun('Accuracy for %s: %.3f' % (algorithm, accuracy))

        if args.upload_model and summary_accuracy >= args.upload_threshold:
            timestamp = datetime.datetime.now().strftime('%s')
            model_name = 'facenet-classifier'

            if args.device == 'MYRIAD':
                model_name = model_name + "-movidius"

            version = '1.0.0-%s-%s' % (args.driver, timestamp)

            print_fun('Uploading model as %s:%s' % (model_name, version))
            upload_model(
                use_mlboard,
                mlboard,
                classifiers_path,
                model_name,
                version
            )
Пример #12
0
 def __init__(self, bg_remove_path):
     utils.print_fun('Load BG_REMOVE model')
     drv = sdrv.load_driver('tensorflow')
     self.drv = drv()
     self.drv.load_model(bg_remove_path)
Пример #13
0
def process():
    size = 1024
    charset, _ = read_charset()
    global chrset_index
    chrset_index = charset
    names = fuzzyset.FuzzySet()
    names.add('stas khirman')
    names.add('khirman stas')
    names.add('stas')
    names.add('khirman')
    drv1 = driver.load_driver('tensorflow')
    serving1 = drv1()
    serving1.load_model('./m1')
    drv2 = driver.load_driver('tensorflow')
    serving2 = drv2()
    serving2.load_model('./m2')
    global to_process
    i_name = 1
    while runned:
        lock.acquire(blocking=True)
        frame = to_process
        if frame is None:
            lock.release()
            continue
        print('start frame')
        to_process = None
        w = frame.shape[1]
        h = frame.shape[0]
        if w > h:
            if w > size:
                ratio = size / float(w)
                h = int(float(h) * ratio)
                w = size
            else:
                if h > size:
                    ratio = size / float(h)
                    w = int(float(w) * ratio)
                    h = size
        w = fix_length(w,32)
        h = fix_length(h,32)
        original = frame[:, :, ::-1].copy()
        image = cv2.resize(original, (w, h))
        image = image.astype(np.float32) / 255.0
        image = np.expand_dims(image, 0)
        outputs = serving1.predict({'image': image})
        cls = outputs['pixel_pos_scores'][0]
        links = outputs['link_pos_scores'][0]
        mask = decodeImageByJoin(cls, links, 0.5, 0.1)
        bboxes = maskToBoxes(mask, (original.shape[1], original.shape[0]))
        found_name = None
        candidates = []
        for i in range(len(bboxes)):
            box = np.int0(cv2.boxPoints(bboxes[i]))
            maxp = np.max(box, axis=0) + 2
            minp = np.min(box, axis=0) - 2

            y1 = max(0, minp[1])
            y2 = min(original.shape[0], maxp[1])
            x1 = max(0, minp[0])
            x2 = min(original.shape[1], maxp[0])
            text_img = original[y1:y2, x1:x2, :]
            if text_img.shape[0] < 4 or text_img.shape[1] < 4:
                continue
            #if bboxes[i][1][0]>bboxes[i][1][1]:
            #    angle = -1*bboxes[i][2]
            #else:
            #    angle = -1*(90+bboxes[i][2])
            #if angle!=0:
            #    text_img = rotate_bound(text_img,angle)
            text_img = norm_image_for_text_prediction(text_img, 32, 320)
            text_img = np.expand_dims(text_img, 0)
            text = serving2.predict({'images':text_img})
            text = text['output'][0]
            text = get_text(text)
            if len(text)>2:
                print('text: {}'.format(text))
                found = names.get(text)
                if (found is not None) and (len(found)>0):
                    print(found[0])
                    if found[0][0]>0.7:
                        text = found[0][1]
                        if ' ' in text:
                            found_name = (found[0][0],text)
                            candidates = []
                            break
                        else:
                            candidates.append(text)
            if (found_name is None) and len(candidates)>0:
                found_name = choose_one(names,candidates)
        for i in bboxes:
            box = cv2.boxPoints(i)
            box = np.int0(box)
            original = cv2.drawContours(original, [box], 0, (255, 0, 0), 2)
        frame = np.ascontiguousarray(original[:, :, ::-1],np.uint8)
        if found_name is not None:
            add_overlays(frame,found_name[0],found_name[1])
            cv2.imwrite('results/result_{}.jpg'.format(i_name),frame)
            global result
            result = frame
            i_name+=1
        global last_processed
        last_processed = frame
        lock.release()
        print('stop frame')
Пример #14
0
def main(args):
    # Create a context object. This object owns the
    # handles to all connected realsense devices
    drv = driver.load_driver('tensorflow')
    serving = drv()
    serving.load_model(args.model)

    gray = 55
    offset = 500
    back = None

    pipeline = rs.pipeline()
    config = rs.config()
    config.enable_device_from_file(args.input, repeat_playback=True)

    # Configure the pipeline to stream the depth stream
    config.enable_stream(rs.stream.depth)
    config.enable_stream(rs.stream.color)  #, 640, 480, rs.format.rgb8, 30)
    profile = pipeline.start(config)

    # Getting the depth sensor's depth scale (see rs-align example for explanation)
    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = depth_sensor.get_depth_scale()
    print("Depth Scale is: ", depth_scale)

    align_to = rs.stream.color
    align = rs.align(align_to)

    # Create opencv window to render image in
    cv2.namedWindow("Video", cv2.WINDOW_AUTOSIZE)

    use_realsense = False
    while True:
        frames = pipeline.wait_for_frames()

        # Align the depth frame to color frame
        aligned_frames = align.process(frames)

        # Get aligned frames
        depth_frame = aligned_frames.get_depth_frame(
        )  # aligned_depth_frame is a 640x480 depth image
        color_frame = aligned_frames.get_color_frame()

        depth_frame = np.asanyarray(depth_frame.get_data())
        # depth_color_image = cv2.applyColorMap(
        #     cv2.convertScaleAbs(depth_frame, alpha=0.08), cv2.COLORMAP_JET
        # )
        color_frame = np.asanyarray(color_frame.get_data())

        color_frame = color_frame[:, :, ::-1]

        if back is None:
            back = np.full([color_frame.shape[0], color_frame.shape[1], 1],
                           gray)

        show_frame = process_frame(serving,
                                   color_frame,
                                   depth_frame,
                                   offset,
                                   back,
                                   use_realsense=use_realsense)

        images = np.vstack((color_frame, show_frame))

        # Render image in opencv window
        cv2.imshow("Video", images)

        key = cv2.waitKey(1)
        # if pressed escape exit program
        if key == 27:
            cv2.destroyAllWindows()
            break
        if key == 32:
            use_realsense = not use_realsense
        if key in {ord('+'), ord('=')}:
            offset += 50
            print(offset)
        if key in {ord('-'), ord('_')}:
            offset -= 50
            print(offset)
Пример #15
0
        # Alpha blend rectangular patches
        img_rect = (1.0 - alpha) * warp_image1 + alpha * warp_image2

        # Copy triangular region of the rectangular patch to the output image
        img_morph[r[1]:r[1] + r[3], r[0]:r[0] + r[2]] = \
            img_morph[r[1]:r[1] + r[3], r[0]:r[0] + r[2]] * (1 - mask) + img_rect * mask
    return img_morph


if __name__ == '__main__':
    args = parse_args()

    input_img = cv2.imread(args.input)
    avg_img = cv2.imread(args.avg)

    drv = driver.load_driver('openvino')
    face_driver = drv()
    face_driver.load_model(args.face_model)

    landmarks_driver = drv()
    landmarks_driver.load_model(args.landmarks_model)

    face_boxes = get_boxes(face_driver, input_img)
    # avg_box = get_boxes(face_driver, avg_img, threshold=0.5)[0]
    # avg_face = crop_by_box(avg_img, avg_box)
    avg_face = avg_img
    face = crop_by_box(input_img, face_boxes[0])

    cv2.namedWindow("Image")
    cv2.imshow("Image", avg_face)
    cv2.waitKey(0)
Пример #16
0
                os.makedirs(args.output, 0o755)
    else:
        inputs = [args.video]

    graph_path = get_graph_path(args.model, models_dir=args.modelsDir)
    logger.debug('initialization %s : %s' % (args.model, graph_path))
    w, h = model_wh(args.resolution)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    e = TfPoseEstimator(
        graph_path,
        target_size=(w, h),
        tf_config=config,
    )

    drv = driver.load_driver("tensorflow")
    d = drv()
    d.load_model(args.modelObjectDetection)

    for inp in inputs:

        logger.info(f"processing video {inp}")
        cap = cv2.VideoCapture(inp)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        if args.rotate == "cw" or args.rotate == "ccw":
            width, height = height, width
        video_writer = None
        if args.output:
            if process_dir:
Пример #17
0
def main(args):

    dataset = facenet.get_dataset(args.data_dir)
    # Check that there are at least one training image per class
    for cls in dataset:
        assert len(
            cls.image_paths
        ) > 0, 'There must be at least one image for each class in the dataset'

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print('Number of classes: %d' % len(dataset))
    print('Number of images: %d' % len(paths))

    # Load the model
    print('Loading feature extraction model')

    # Load driver
    drv = driver.load_driver(args.driver)
    # Instantinate driver
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device=args.device,
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    print('Calculating features for images')
    nrof_images = len(paths)
    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images /
                                           args.batch_size))
    embeddings_size = nrof_images
    emb_array = np.zeros((embeddings_size, 512))
    start_time = time.time()
    for j in range(100):
        for i in range(nrof_batches_per_epoch):
            start_index = i * args.batch_size
            end_index = min((i + 1) * args.batch_size, nrof_images)
            paths_batch = paths[start_index:end_index]
            images = facenet.load_data(paths_batch, False, False,
                                       args.image_size)

            if serving.driver_name == 'tensorflow':
                feed_dict = {'input:0': images, 'phase_train:0': False}
            elif serving.driver_name == 'openvino':
                input_name = list(serving.inputs.keys())[0]

                # Transpose image for channel first format
                images = images.transpose([0, 3, 1, 2])
                feed_dict = {input_name: images}
            else:
                raise RuntimeError('Driver %s currently not supported' %
                                   serving.driver_name)

            outputs = serving.predict(feed_dict)

    end_time = time.time()
    nrof_batches_per_epoch *= 100
    print("Duration: {} sec/sample batch count:{}".format(
        (end_time - start_time) / nrof_batches_per_epoch,
        nrof_batches_per_epoch))
    print("Speed: {} sample/sec batch count:{}".format(
        nrof_batches_per_epoch / (end_time - start_time),
        nrof_batches_per_epoch))
Пример #18
0
def main():
    frame_interval = 2  # Number of frames after which to run face detection
    fps_display_interval = 5  # seconds
    frame_rate = 0
    frame_count = 0
    start_time = time.time()

    parser = get_parser()
    args = parser.parse_args()

    drv = driver.load_driver('tensorflow')
    serving = drv()
    serving.load_model('./model')
    if args.camera:
        video_capture = cv2.VideoCapture(args.camera)
    else:
        video_capture = cv2.VideoCapture(0)
    width, height = get_size(args.size)
    back = cv2.imread('./newback.jpg')[:, :, ::-1]
    back = cv2.resize(back, (width, height))
    # back = np.full([height, width, 1], 100)
    back = back.astype(np.float32)
    try:
        while True:
            _, frame = video_capture.read()
            #print("Orginal {}".format(frame.shape))

            frame = imresample(frame, height, width)

            if (frame_count % frame_interval) == 0:
                # BGR -> RGB
                frame = frame[:, :, ::-1]
                frame = frame.astype(np.float32)
                input = cv2.resize(frame, (160, 160))
                input = np.asarray(input, np.float32) / 255.0
                outputs = serving.predict(
                    {'image': np.expand_dims(input, axis=0)})
                mask = outputs['output'][0]
                mask = cv2.resize(mask, (width, height))
                mask = np.expand_dims(mask, 2)
                frame = np.concatenate(
                    [frame, frame * mask + back * (1 - mask)], axis=1)
                #print('rgb_frame {}'.format(rgb_frame.shape))
                #rgb_frame = rgb_frame.astype(np.uint8)
                frame = np.ascontiguousarray(frame[:, :, ::-1], np.uint8)

                # Check our current fps
                end_time = time.time()
                if (end_time - start_time) > fps_display_interval:
                    frame_rate = int(frame_count / (end_time - start_time))
                    start_time = time.time()
                    frame_count = 0

                add_overlays(frame, frame_rate / 2)

                cv2.imshow('Video', frame)
            frame_count += 1
            key = cv2.waitKey(1)
            # Wait 'q' or Esc
            if key == ord('q') or key == 27:
                break

    except (KeyboardInterrupt, SystemExit) as e:
        print('Caught %s: %s' % (e.__class__.__name__, e))

    # When everything is done, release the capture
    video_capture.release()
    cv2.destroyAllWindows()
    print('Finished')
Пример #19
0
def main(args):
    drv = driver.load_driver('tensorflow')
    serving = drv()
    serving.load_model(args.model)

    color_frame = np.load(args.color)
    depth_frame = np.load(args.depth)
    width, height = color_frame.shape[1], color_frame.shape[0]

    gray = 55
    back = np.full([height, width, 1], gray)

    frame = color_frame
    frame = frame.astype(np.float32)
    inputs = cv2.resize(frame, (160, 160))
    inputs = np.asarray(inputs, np.float32) / 255.0
    outputs = serving.predict({'image': np.expand_dims(inputs, axis=0)})

    mask = outputs['output'][0]
    mask = cv2.resize(mask, (width, height))

    mask = np.expand_dims(mask, 2)
    threshold = args.threshold
    use_realsense = False
    while True:
        if use_realsense:
            mask_2d = np.copy(mask).reshape(mask.shape[0], mask.shape[1])
            center = np.round(
                ndimage.measurements.center_of_mass(mask_2d)).astype(np.int)
            x = center[0]
            y = center[1]
            depth = depth_frame[x][y]
            max_depth = depth * threshold
            # Drop pixels which have depth more than foreground * threshold
            mask_2d[depth_frame >= max_depth] = 0

            mask_3d = mask_2d.reshape(mask.shape[0], mask.shape[1], 1)
            show_frame = np.concatenate(
                [frame, frame * mask_3d + back * (1 - mask_3d)], axis=1)
            show_frame = np.ascontiguousarray(show_frame[:, :, ::-1], np.uint8)
        else:
            show_frame = np.concatenate(
                [frame, frame * mask + back * (1 - mask)], axis=1)
            show_frame = np.ascontiguousarray(show_frame[:, :, ::-1], np.uint8)

        cv2.imshow('Video', show_frame)
        key = cv2.waitKey(1)
        # Wait 'q' or Esc
        if key == ord('q') or key == 27:
            break
        if key == 32:
            use_realsense = not use_realsense
        if key in {ord('+'), ord('=')}:
            threshold += 0.025
            print(threshold)
        if key in {ord('-'), ord('_')}:
            threshold -= 0.025
            print(threshold)

    cv2.destroyAllWindows()
    print('Finished')
Пример #20
0
def main(args):
    dataset = facenet.get_dataset(args.data_dir)
    # Check that there are at least one training image per class
    for cls in dataset:
        assert len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset'

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print('Number of classes: %d' % len(dataset))
    print('Number of images: %d' % len(paths))

    # Load the model
    print('Loading feature extraction model')

    # Load driver
    drv = driver.load_driver(args.driver)
    # Instantinate driver
    serving = drv(
        preprocess=serving_hook.preprocess,
        postprocess=serving_hook.postprocess,
        init_hook=serving_hook.init_hook,
        classifier=args.classifier,
        use_tf='False',
        use_face_detection='True',
        face_detection_path=args.face_detection_path
    )
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device=args.device,
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    print('Calculating features for images')
    time_requests = 0.0
    epochs = 2
    start_time = time.time()
    for j in range(epochs):
        for path in paths:
            print('Processing %s...' % path)
            with open(path, 'rb') as f:
                data = f.read()

            t = time.time()

            feed_dict = {'input': np.array(data)}
            outputs = serving.predict_hooks(feed_dict, context=Context())

            delta = (time.time() - t) * 1000
            time_requests += delta

    duration = float(time.time() - start_time)
    print()
    print('Total time: %.3fs' % duration)
    per_request_ms = float(time_requests) / epochs / len(paths)
    print('Time per request: %.3fms' % per_request_ms)

    speed = 1 / (per_request_ms / 1000)
    print('Speed: {} sample/sec'.format(speed))
Пример #21
0
from ml_serving.drivers import driver
import cv2
import os
from scipy import ndimage
import numpy as np
import json
import shutil
import logging
import argparse

LOG = logging.getLogger(__name__)

face_drv = driver.load_driver("model")()

mat_drv = driver.load_driver("model")()

face_input_name = ''
face_input_shape = None
face_output_name = ''

g_mean = np.array(([126.88, 120.24, 112.19])).reshape([1, 1, 3])
unknown_code = 128


def generate_trimap(alpha):
    trimap = np.copy(alpha)
    k_size = 20
    trimap[np.where(
        (ndimage.grey_dilation(alpha[:, :], size=(k_size, k_size)) -
         ndimage.grey_erosion(alpha[:, :], size=(k_size, k_size))) != 0
    )] = unknown_code