Пример #1
0
def main():
    ml = client.Client()

    current_task_name = os.environ.get('TASK_NAME')
    LOG.info("Current task name = %s" % current_task_name)

    app = ml.apps.get()

    for task in app.tasks:
        if task.name == current_task_name:
            continue

        LOG.info("Start task %s..." % task.name)
        started = task.start()

        LOG.info("Run & wait [name=%s, build=%s, status=%s]" %
                 (started.name, started.build, started.status))
        completed = started.wait()

        if completed.status != SUCCEEDED:
            LOG.warning("Task %s-%s completed with status %s." %
                        (completed.name, completed.build, completed.status))
            LOG.warning("Workflow is completed with status ERROR")
            sys.exit(1)
        LOG.info("Task %s-%s completed with status %s." %
                 (completed.name, completed.build, completed.status))

    LOG.info("Workflow is completed with status SUCCESS")
Пример #2
0
def main():

    m = client.Client()

    args = parse_args()
    data_dir = path.join(args.train_dir, 'mnist_sample')
    data_path = untar_data(URLs.MNIST_SAMPLE,
                           fname=tempfile.mktemp(),
                           dest=data_dir)
    print('Using path %s' % data_path)
    m.update_task_info({'data_path': str(data_path)})

    data = ImageDataBunch.from_folder(data_path,
                                      ds_tfms=(rand_pad(2, 28), []),
                                      bs=64)
    data.normalize(imagenet_stats)

    learn = create_cnn(data, models.resnet18, metrics=accuracy)
    learn.fit_one_cycle(1, 0.01)
    print('Accuracy %s' % str(accuracy(*learn.get_preds())))
    m.update_task_info({'accuracy': str(accuracy(*learn.get_preds()))})

    model_location = path.join(args.train_dir, "model")
    model_location = learn.save(model_location, return_path=True)
    print('Model saved to %s' % model_location)
    m.update_task_info({'model_location': str(model_location)})

    print('Network structure:')
    learn.model.eval()
Пример #3
0
def main(_):
    img_dir = '/tmp/mnist'
    logging.info("Extracting images to %s ...", img_dir)
    mnist = input_data.read_data_sets(FLAGS.data_dir,
                                      one_hot=True,
                                      source_url=FLAGS.source_url)

    os.mkdir(img_dir)
    for i in range(mnist.test.num_examples):
        batch = mnist.test.next_batch(1)
        v = np.reshape(batch[0][0], (28, 28)) * 255
        l = batch[1][0]
        l = l.astype(int)
        n = 0
        for k in l:
            if k == 1:
                break
            n += 1
        l = ('%s/%d-%d.png') % (img_dir, i, n)
        scipy.misc.imsave(l, v)
    logging.info("Pushing dataset to %s:%s ...", FLAGS.catalog_name,
                 FLAGS.version)
    kl = client.Client()
    kl.datasets.push(os.environ.get('WORKSPACE_NAME'),
                     FLAGS.catalog_name,
                     FLAGS.version,
                     img_dir,
                     create=True)
    logging.info("Push success")
Пример #4
0
def main():
    ml = client.Client()

    current_task_name = os.environ.get('TASK_NAME')
    LOG.info("Current task name = %s" % current_task_name)

    current_project = os.environ['PROJECT_NAME']
    current_workspace = os.environ['WORKSPACE_ID']

    LOG.info("Current project = %s" % current_project)
    LOG.info("Current workspace = %s" % current_workspace)

    current_app_id = current_workspace + '-' + current_project
    app = ml.apps.get(current_app_id)

    for task in app.tasks:
        if task.name == current_task_name:
            continue

        LOG.info("Start task %s..." % task.name)
        started = task.start()

        LOG.info("Run & wait [name=%s, build=%s, status=%s]" %
                 (started.name, started.build, started.status))
        completed = started.wait()

        if completed.status != SUCCEEDED:
            LOG.warning("Task %s-%s completed with status %s." %
                        (completed.name, completed.build, completed.status))
            LOG.warning("Workflow is completed with status ERROR")
            sys.exit(1)
        LOG.info("Task %s-%s completed with status %s." %
                 (completed.name, completed.build, completed.status))

    LOG.info("Workflow is completed with status SUCCESS")
Пример #5
0
def export(checkpoint_dir, task_name, build_id, catalog_name):
    featureX = tf.contrib.layers.real_valued_column('x')
    featureY = tf.contrib.layers.real_valued_column('y')
    model = tf.contrib.learn.SVM(example_id_column='i',
                                 feature_columns=[featureX, featureY],
                                 model_dir=checkpoint_dir)
    feature_spec = {
        'x': tf.FixedLenFeature(dtype=tf.float32, shape=[1]),
        'y': tf.FixedLenFeature(dtype=tf.float32, shape=[1])
    }
    serving_fn = tf.contrib.learn.utils.input_fn_utils.build_parsing_serving_input_fn(
        feature_spec)
    export_path = model.export_savedmodel(export_dir_base=checkpoint_dir,
                                          serving_input_fn=serving_fn)
    export_path = export_path.decode("utf-8")
    logging.info("\nModel Path: %s", export_path)
    client.update_task_info({'model_path': export_path},
                            task_name=task_name,
                            build_id=build_id)
    client.update_task_info({
        'model_path': export_path,
        'checkpoint_path': checkpoint_dir
    })
    if catalog_name is not None:
        ml = client.Client()
        ml.model_upload(catalog_name, '1.0.' + build_id, export_path)
Пример #6
0
    def __init__(self,
                 checkpoint_dir,
                 tensors=None,
                 submit_summary=True,
                 max_number_images=1,
                 every_steps=None,
                 every_n_secs=60):
        if tensors is None:
            tensors = {}
        if every_steps is not None:
            every_n_secs = None
        self._timer = tf.train.SecondOrStepTimer(every_steps=every_steps,
                                                 every_secs=every_n_secs)
        if submit_summary:
            self._rpt = Report(checkpoint_dir,
                               max_number_images=max_number_images)
        else:
            self._rpt = None
        try:
            from mlboardclient.api import client
        except ImportError:
            tf.logging.warning("Can't find mlboardclient.api")
            client = None
        mlboard = None
        if client:
            mlboard = client.Client()
            try:
                mlboard.apps.get()
            except Exception:
                tf.logging.warning("Can't init mlboard env")
                mlboard = None

        self._mlboard = mlboard
        self._tensors = tensors
        self._most_recent_step = 0
Пример #7
0
def export(checkpoint_dir, params):
    m = client.Client()
    base_id = '0'
    if os.environ.get('BASE_TASK_BUILD_ID', '') != '':
        app = m.apps.get()
        base_id = os.environ['BASE_TASK_BUILD_ID']
        task_name = os.environ['BASE_TASK_NAME']
        task = app.get_task(task_name, base_id)
        checkpoint_dir = task.exec_info['checkpoint_path']
        params['num_chans'] = task.exec_info['num-chans']
        params['num_pools'] = task.exec_info['num-pools']
        params['resolution'] = task.exec_info['resolution']
        params['checkpoint'] = checkpoint_dir

    conf = tf.estimator.RunConfig(
        model_dir=checkpoint_dir,
    )
    params['batch_size'] = 1
    features_def = [int(i) for i in task.exec_info.get('features','3').split(',')]
    logging.info('Features Def: {}'.format(features_def))
    params['features'] = features_def
    feature_placeholders = {
        'image': tf.placeholder(tf.float32, [1, None, None, sum(features_def)], name='image'),
    }
    receiver = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_placeholders)
    net = FastBGNet(
        params=params,
        model_dir=checkpoint_dir,
        config=conf,
    )
    models = os.path.join(checkpoint_dir, 'models')
    build_id = os.environ['BUILD_ID']
    export_dir = os.path.join(models, build_id)
    os.makedirs(export_dir, exist_ok=True)
    export_path = net.export_savedmodel(
        export_dir,
        receiver,
    )
    export_path = export_path.decode("utf-8")
    base = os.path.basename(export_path)
    driver_data = {'driver': 'tensorflow', 'path': base}
    with open(os.path.join(export_dir, '_model_config.yaml'), 'w') as f:
        yaml.dump(driver_data, f)
    params['num_chans'] = task.exec_info['num-chans']
    params['num_pools'] = task.exec_info['num-pools']
    params['resolution'] = task.exec_info['resolution']
    version = f'1.{base_id}.{build_id}'
    model_name = 'person-mask'
    m.model_upload(model_name, version, export_dir, spec=serving_spec())
    client.update_task_info({'model_path': export_path, 'num-chans': params['num_chans'],
                             'features':','.join([str(i) for i in features_def]),
                             'num-pools': params['num_pools'], 'resolution': params['resolution'],
                             'model_reference': catalog_ref(model_name, 'mlmodel', version)})
Пример #8
0
def main(_):
    if FLAGS.mode == 'export':
        log_dir = os.environ.get('TRAINING_DIR') + '/' + FLAGS.build
        client.update_task_info({'checkpoint_path': log_dir})
        export_path = os.path.join(log_dir, str(FLAGS.model_version))
        export(export_path, log_dir)
        client.update_task_info({'model_path': export_path}, task_name='train', build_id=FLAGS.build)
        if FLAGS.catalog_name is not None:
            ml = client.Client()
            ml.model_upload(FLAGS.catalog_name, '1.0.' + FLAGS.build, export_path)
    else:
        train()
Пример #9
0
def push_model(target, dirame):
    if os.environ.get('PROJECT_ID', None):
        from mlboardclient.api import client
        timestamp = datetime.datetime.now().strftime('%s')
        if target is not None:
            version = '1.0.0-openvino-{}-{}'.format(target, timestamp)
        else:
            version = '1.0.0-openvino-{}'.format(timestamp)
        mlboard = client.Client()
        mlboard.model_upload('facenet', version, dirame)
        submit({'model': catalog_ref('facenet', 'mlmodel', version)})
        logging.info(
            "New model uploaded as 'facenet', version '{}'.".format(version))
Пример #10
0
def push(name, dirame):
    if os.environ.get('PROJECT_ID', None):
        from mlboardclient.api import client
        timestamp = datetime.datetime.now().strftime('%s')
        if name is not None:
            version = '1.0.0-{}-{}'.format(name, timestamp)
        else:
            version = '1.0.0-{}'.format(timestamp)
        mlboard = client.Client()
        mlboard.model_upload('openvino-facenet', version, dirame)
        submit({'model': '{}:{}'.format('openvino-facenet', version)})
        logging.info(
            "New model uploaded as 'openvino-facenet', version '%s'." %
            (version))
Пример #11
0
def get():
    if client is None:
        return None
    global mlboard, mlboard_tried
    if not mlboard_tried:
        mlboard_tried = True
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            logging.info('Do not use mlboard.')
        else:
            logging.info('Use mlboard parameters logging.')
    return mlboard
Пример #12
0
def main(_):
    data_dir = "/tmp/mnist"
    logging.info("Uploading dataset to %s ...", data_dir)
    input_data.read_data_sets(data_dir,
                              one_hot=True,
                              source_url=FLAGS.source_url)
    logging.info("Upload success")
    logging.info("Pushing dataset to %s:%s ...", FLAGS.catalog_name,
                 FLAGS.version)
    kl = client.Client()
    kl.datasets.push(os.environ.get('WORKSPACE_NAME'),
                     FLAGS.catalog_name,
                     FLAGS.version,
                     data_dir,
                     create=True)
    logging.info("Push success")
Пример #13
0
def after_export(training_dir, train_build_id, model_name, model_version):

    m = client.Client()
    m.model_upload(
        model_name,
        model_version,
        '{}/model/{}/saved_model'.format(training_dir, train_build_id),
    )
    m.update_task_info({
        'model':
        '#/%s/catalog/mlmodel/%s/versions/%s' % (
            os.environ['WORKSPACE_NAME'],
            model_name,
            model_version,
        ),
    })
Пример #14
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.debug:
        logging.root.setLevel('DEBUG')

    m = client.Client()
    app = m.apps.get()
    task = app.task('train')
    if args.data_version!='':
        task.config['datasetRevisions']=[{'volumeName': 'data', 'revision': args.data_version}]
    task.resource('worker')['command'] = 'python svm.py --steps=1000 --checkpoint_dir=$TRAINING_DIR/$BUILD_ID'
    spec = (optimizator.ParamSpecBuilder().resource('worker')
            .param('l2_regularization')
            .bounds(1,10)
            .param('l1_regularization')
            .bounds(1,10)
            .build())
    LOG.info('Run with param spec = %s', spec)
    result = task.optimize(
        'accuracy',
        spec,
        init_steps=args.init_steps,
        iterations=args.iterations,
        method=args.method,
        max_parallel=args.parallel,
        direction='maximize'
    )
    best = result['best']
    LOG.info('Found best build %s:%s: %.2f', best.name,best.build,best.exec_info['accuracy'])
    client.update_task_info({'checkpoint_path':best.exec_info['checkpoint_path'],
                             'accuracy':best.exec_info['accuracy'],'build':best.build})
    LOG.info('Exporting model %s...',best.build)
    export = app.task('export')
    export.resource('run')['command'] = 'python svm.py --export_model'
    export.resource('run')['args']= {
        'catalog_name': 'my_svm_model',
        'task_name': best.name,
        'build_id': best.build,
        'checkpoint_dir': best.exec_info['checkpoint_path']
    }
    export.start()
    export.wait()
    client.update_task_info({'model_path':export.exec_info['model_path']})
Пример #15
0
def push_dataset(target, dirname):
    if os.environ.get('PROJECT_ID', None):
        from mlboardclient.api import client
        timestamp = datetime.datetime.now().strftime('%s')
        if target is not None:
            version = '1.0.0-openvino-{}-{}'.format(target, timestamp)
        else:
            version = '1.0.0-openvino-{}'.format(timestamp)
        mlboard = client.Client()
        mlboard.datasets.push(os.environ.get('WORKSPACE_NAME'),
                              'facenet-pretrained',
                              version,
                              dirname,
                              create=True)
        submit(
            {'model': catalog_ref('facenet-pretrained', 'dataset', version)})
        logging.info("New model uploaded as 'facenet-pretrained', "
                     "version '{}'.".format(version))
Пример #16
0
def main():
    build_config()
    parser = ArgumentParser()
    parser.add_argument('--research_dir')
    parser.add_argument('--training_dir')
    parser.add_argument('--model_name', default="object-detection")
    parser.add_argument('--model_version', default="1.0.0")
    parser.add_argument('--train_build_id')
    parser.add_argument('--train_checkpoint')
    args, _ = parser.parse_known_args()

    targs = sys.argv[:]

    targs[0] = args.research_dir + '/object_detection/export_inference_graph.py'
    targs.insert(0, sys.executable or 'python')

    targs.append("--pipeline_config_path")
    targs.append("faster_rcnn.config")

    targs.append("--trained_checkpoint_prefix")
    targs.append("%s/%s/model.ckpt-%s" % (args.training_dir, args.train_build_id, args.train_checkpoint))

    targs.append("--output_directory")
    targs.append("%s/model/%s" % (args.training_dir, args.train_build_id))

    targs.append("--input_type")
    targs.append("encoded_image_string_tensor")

    print("Execute: ", targs)
    call(targs)

    m = client.Client()
    m.model_upload(
        args.model_name,
        args.model_version,
        '%s/model/%s/saved_model' % (args.training_dir, args.train_build_id),
    )
    m.update_task_info({
        'model': '#/%s/catalog/mlmodel/%s/versions/%s' % (
            os.environ['WORKSPACE_NAME'],
            args.model_name,
            args.model_version,
        ),
    })
Пример #17
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.debug:
        logging.root.setLevel('DEBUG')

    m = client.Client()
    if args.task != '':
        app = m.apps.get()
        serving = app.servings[0]
        srv = serving.start(args.task, args.build)
        LOG.info("Start serving")
        name = os.environ.get('PROJECT_NAME') + '-' + srv.config[
            'name'] + '-' + args.task + '-' + args.build
        LOG.info("Waiting serving ready...")
        status = 0
        for i in range(6):
            time.sleep(10)
            resp = m.servings.call(
                name,
                'svm', {"features": [{
                    "x": {
                        "Float": 0
                    },
                    "y": {
                        "Float": 0
                    }
                }]},
                port='9000')
            status = resp.status_code
            if status == 200:
                break
        if status != 200:
            srv.stop()
            raise RuntimeError('Failed start serving')
        res = validate(m, name)
        srv.stop()
        if res != '':
            raise RuntimeError(res)
    else:
        res = validate(m, args.model)
        if res != '':
            raise RuntimeError(res)
Пример #18
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.debug:
        logging.root.setLevel('DEBUG')

    ml = client.Client()

    app = ml.apps.get()

    def _task(i, task):
        step_count = 100 + i * 300
        model = 'svm-s_%d-l1_%d-l2_%d' % (step_count, 1, 10)
        task.comment = model
        task.resource('worker')['args'] = {
            'l2_regularization':
            10,
            'l1_regularization':
            1,
            'steps':
            step_count,
            'checkpoint_dir':
            '%s/%s-%s' %
            (os.environ.get('TRAINING_DIR'), os.environ.get('BUILD_ID'), model)
        }

    def jobs(num):
        for i in range(num):
            yield partial(_task, i)

    task = app.task('train')
    results = task.parallel_run(2, jobs(args.runs))
    best_accuracy = 0
    best = None
    for r in results:
        if r.exec_info['accuracy'] > best_accuracy:
            best_accuracy = r.exec_info['accuracy']
            best = r
    LOG.info('BEST %s with accuracy=%.2f', best.exec_info['model_path'],
             best_accuracy)
Пример #19
0
def main():

    build_config()

    with open('faster_rcnn.config', 'r') as cf:
        data = cf.read()
        config_html = '<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{}</pre></body></html>'.format(
            data)
    client.Client().update_task_info({'#documents.config.html': config_html})

    parser = ArgumentParser()
    parser.add_argument('--training_dir')
    parser.add_argument('--research_dir')
    parser.add_argument('--model_name', default="object-detection")
    parser.add_argument('--model_version', default="1.0.0")
    parser.add_argument('--build_id')
    parser.add_argument('--num_steps')
    args, _ = parser.parse_known_args()

    export_subprocess(args.research_dir, args.training_dir, args.build_id,
                      args.num_steps, args.model_name, args.model_version)
Пример #20
0
def main():
    build_config()
    parser = ArgumentParser()
    parser.add_argument('--training_dir')
    parser.add_argument('--research_dir')
    parser.add_argument('--build_id')
    parser.add_argument('--num_steps', default=1000)
    parser.add_argument('--only_train', default='False')
    args, _ = parser.parse_known_args()

    targs = sys.argv[:]
    targs[0] = args.research_dir + '/object_detection/model_main.py'
    targs.insert(0, sys.executable or 'python')
    targs.append("--pipeline_config_path")
    targs.append("faster_rcnn.config")
    targs.append("--model_dir")
    targs.append("%s/%s" % (args.training_dir, args.build_id))

    call(targs)

    client.Client().update_task_info({'train_build_id': args.build_id})
Пример #21
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.debug:
        logging.root.setLevel('DEBUG')

    m = client.Client()
    app = m.apps.get()
    task = app.task('train')
    task.resource(
        'worker'
    )['command'] = 'python mnist.py --training_iteration=1000 --model_version 0'
    spec = (optimizator.ParamSpecBuilder().resource('worker').param(
        'fully_neurons').int().bounds(1,
                                      3).param('drop_out').bounds(0.2,
                                                                  0.7).build())
    LOG.info('Run with param spec = %s', spec)
    result = task.optimize('test_accuracy',
                           spec,
                           init_steps=args.init_steps,
                           iterations=args.iterations,
                           method=args.method,
                           max_parallel=args.parallel,
                           direction='maximize')
    best = result['best']
    LOG.info('Found best build %s:%s: %.2f', best.name, best.build,
             best.exec_info['test_accuracy'])
    LOG.info('Exporting model to catalog mnist/%s', best.build)
    export = app.task('export')
    export.resource('run')['command'] = 'python mnist.py'
    export.resource('run')['args'] = {
        'mode': 'export',
        'catalog_name': 'mnist',
        'build': best.build,
        'model_version': 1
    }
    export.start()
    export.wait()
Пример #22
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    dataset = '/tmp/data'
    if not os.path.exists(dataset):
        os.makedirs(dataset)
    train = pd.DataFrame(random.randint(low=0, high=100, size=(1000, 2)),
                         columns=['x', 'y'])
    train['label'] = train.apply(lambda v: 0 if v['x'] > v['y'] +
                                 (5 - random.random_sample() * 10) else 1,
                                 axis=1)
    test = pd.DataFrame(random.randint(low=0, high=100, size=(100, 2)),
                        columns=['x', 'y'])
    test['label'] = test.apply(lambda v: 0 if v['x'] > v['y'] else 1, axis=1)
    train.to_csv(dataset + '/train.csv')
    test.to_csv(dataset + '/test.csv')
    kl = client.Client()
    kl.datasets.push(os.environ.get('WORKSPACE_NAME'),
                     args.dataset,
                     args.version,
                     dataset,
                     create=True)
    client.update_task_info(
        {'dataset': '%s:%s' % (args.dataset, args.version)})
Пример #23
0
def main(args):
    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            print('Do not use mlboard.')
        else:
            print('Use mlboard parameters logging.')
            use_mlboard = True

    with tf.Graph().as_default():
        with tf.Session() as sess:
            np.random.seed(seed=args.seed)

            if args.use_split_dataset:
                dataset_tmp = facenet.get_dataset(args.data_dir)
                train_set, test_set = split_dataset(
                    dataset_tmp, args.min_nrof_images_per_class,
                    args.nrof_train_images_per_class)
                if args.mode == 'TRAIN':
                    dataset = train_set
                elif args.mode == 'CLASSIFY':
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(args.data_dir)

            update_data({'mode': args.mode}, use_mlboard, mlboard)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert len(
                    cls.image_paths
                ) > 0, 'There must be at least one image for each class in the dataset'

            paths, labels = facenet.get_image_paths_and_labels(dataset)

            print('Number of classes: %d' % len(dataset))
            print('Number of images: %d' % len(paths))
            data = {
                'num_classes': len(dataset),
                'num_images': len(paths),
                'model_path': args.model,
                'image_size': args.image_size,
                'data_dir': args.data_dir,
                'batch_size': args.batch_size,
            }
            update_data(data, use_mlboard, mlboard)

            # Load the model
            print('Loading feature extraction model')
            facenet.load_model(args.model)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(
                math.ceil(1.0 * nrof_images / args.batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * args.batch_size
                end_index = min((i + 1) * args.batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                for j in range(end_index - start_index):
                    print('Batch {} <-> {}'.format(paths_batch[j],
                                                   labels[start_index + j]))
                images = facenet.load_data(paths_batch, False, False,
                                           args.image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            classifier_filename_exp = os.path.expanduser(
                args.classifier_filename)

            if args.mode == 'TRAIN':
                # Train classifier
                print('Training classifier')
                model = svm.SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)

                # Create a list of class names
                class_names = [cls.name.replace('_', ' ') for cls in dataset]
                print('Classes:')
                print(class_names)

                # Saving classifier model
                with open(classifier_filename_exp, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile, protocol=2)
                print('Saved classifier model to file "%s"' %
                      classifier_filename_exp)

            elif args.mode == 'CLASSIFY':
                # Classify images
                print('Testing classifier')
                with open(classifier_filename_exp, 'rb') as infile:
                    (model, class_names) = pickle.load(infile)

                print('Loaded classifier model from file "%s"' %
                      classifier_filename_exp)

                predictions = model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]

                for i in range(len(best_class_indices)):
                    print('%4d  %s: %.3f' %
                          (i, class_names[best_class_indices[i]],
                           best_class_probabilities[i]))

                accuracy = np.mean(np.equal(best_class_indices, labels))
                update_data({'accuracy': accuracy}, use_mlboard, mlboard)
                print('Accuracy: %.3f' % accuracy)

                if args.upload_model and accuracy >= args.upload_threshold:
                    timestamp = datetime.datetime.now().strftime('%s')

                    upload_model(use_mlboard, mlboard, classifier_filename_exp,
                                 'facenet-classifier', '1.0.0-%s' % timestamp)
Пример #24
0
##
#  Runs train -> eval -> export.
##
import argparse
import logging
import re, sys

from mlboardclient.api import client

SUCCEEDED = 'Succeeded'

logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger(__name__)
mlboard = client.Client()
run_tasks = [
    'train',
    'export',
]


def override_task_arguments(task, params):
    for k, v in params.items():
        pattern = re.compile('--{}[ =]([^\s]+|[\'"].*?[\'"])'.format(k))
        resource = task.config['resources'][0]
        task_cmd = resource['command']
        replacement = '--{} {}'.format(k, v)
        if pattern.findall(task_cmd):
            # Replace
            resource['command'] = pattern.sub(replacement, task_cmd)
        else:
            # Add
Пример #25
0
def main(args):
    algorithms = ["kNN", "SVM"]

    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            print_fun('Do not use mlboard.')
        else:
            print_fun('Use mlboard parameters logging.')
            use_mlboard = True

    if args.use_split_dataset:
        dataset_tmp = facenet.get_dataset(args.data_dir)
        train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class,
                                            args.nrof_train_images_per_class)
        if args.mode == 'TRAIN':
            dataset = train_set
        elif args.mode == 'CLASSIFY':
            dataset = test_set
    else:
        dataset = facenet.get_dataset(args.data_dir)

    update_data({'mode': args.mode}, use_mlboard, mlboard)

    # Check that there are at least one training image per class
    for cls in dataset:
        if len(cls.image_paths) == 0:
            print_fun('WARNING: %s: There are no aligned images in this class.' % cls)

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print_fun('Number of classes: %d' % len(dataset))
    print_fun('Number of images: %d' % len(paths))
    data = {
        'num_classes': len(dataset),
        'num_images': len(paths),
        'model_path': args.model,
        'image_size': args.image_size,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    # Load the model
    print_fun('Loading feature extraction model')

    # Load and instantinate driver
    drv = driver.load_driver(args.driver)
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device=args.device,
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    print_fun('Calculating features for images')

    noise_count = max(0, args.noise_count) if args.noise else 0
    emb_args = {
        'model': args.model,
        'use_split_dataset': args.use_split_dataset,
        'noise': noise_count > 0,
        'noise_count': noise_count,
        'flip': args.flip,
        'image_size': args.image_size,
        'min_nrof_images_per_class': args.min_nrof_images_per_class,
        'nrof_train_images_per_class': args.nrof_train_images_per_class,
    }

    stored_embeddings = {}
    if args.mode == 'TRAIN':
        embeddings_filename = os.path.join(
            args.data_dir,
            "embeddings-%s.pkl" % hashlib.md5(json.dumps(emb_args, sort_keys=True).encode()).hexdigest(),
        )
        if os.path.isfile(embeddings_filename):
            print_fun("Found stored embeddings data, loading...")
            with open(embeddings_filename, 'rb') as embeddings_file:
                stored_embeddings = pickle.load(embeddings_file)

    total_time = 0.

    nrof_images = len(paths)

    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / args.batch_size))
    epp = embeddings_per_path(noise_count, args.flip)
    embeddings_size = nrof_images * epp

    emb_array = np.zeros((embeddings_size, 512))
    fit_labels = []

    emb_index = 0
    for i in range(nrof_batches_per_epoch):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = paths[start_index:end_index]
        labels_batch = labels[start_index:end_index]

        # has_not_stored_embeddings = False
        paths_batch_load, labels_batch_load = [], []

        for j in range(end_index - start_index):
            # print_fun(os.path.split(paths_batch[j]))
            cls_name = dataset[labels_batch[j]].name
            cached = True
            if cls_name not in stored_embeddings or paths_batch[j] not in stored_embeddings[cls_name]:
                # has_not_stored_embeddings = True
                cached = False
                paths_batch_load.append(paths_batch[j])
                labels_batch_load.append(labels_batch[j])
            else:
                embeddings = stored_embeddings[cls_name][paths_batch[j]]
                emb_array[emb_index:emb_index + len(embeddings), :] = stored_embeddings[cls_name][paths_batch[j]]
                fit_labels.extend([labels_batch[j]] * len(embeddings))
                emb_index += len(embeddings)

            print_fun('Batch {} <-> {} {} {}'.format(
                paths_batch[j], labels_batch[j], cls_name, "cached" if cached else "",
            ))

        if len(paths_batch_load) == 0:
            continue

        images = load_data(paths_batch_load, labels_batch_load, args.image_size, noise_count, args.flip)

        if serving.driver_name == 'tensorflow':
            feed_dict = {'input:0': images, 'phase_train:0': False}
        elif serving.driver_name == 'openvino':
            input_name = list(serving.inputs.keys())[0]
            # Transpose image for channel first format
            images = images.transpose([0, 3, 1, 2])
            feed_dict = {input_name: images}
        else:
            raise RuntimeError('Driver %s currently not supported' % serving.driver_name)

        t = time.time()
        outputs = serving.predict(feed_dict)
        total_time += time.time() - t

        emb_outputs = list(outputs.values())[0]

        if args.mode == "TRAIN":
            for n, e in enumerate(emb_outputs):
                cls_name = dataset[labels_batch_load[n]].name
                if cls_name not in stored_embeddings:
                    stored_embeddings[cls_name] = {}
                path = paths_batch_load[n]
                if path not in stored_embeddings[cls_name]:
                    stored_embeddings[cls_name][path] = []
                stored_embeddings[cls_name][path].append(e)

        emb_array[emb_index:emb_index + len(images), :] = emb_outputs
        fit_labels.extend(labels_batch_load)

        emb_index += len(images)

    # average_time = total_time / embeddings_size * 1000
    # print_fun('Average time: %.3fms' % average_time)

    classifiers_path = os.path.expanduser(args.classifiers_path)

    if args.mode == 'TRAIN':

        # Save embeddings
        with open(embeddings_filename, 'wb') as embeddings_file:
            pickle.dump(stored_embeddings, embeddings_file, protocol=2)

        # Clear (or create) classifiers directory
        try:
            shutil.rmtree(classifiers_path, ignore_errors=True)
        except:
            pass
        os.makedirs(classifiers_path)

        # Create a list of class names
        dataset_class_names = [cls.name for cls in dataset]
        class_names = [cls.replace('_', ' ') for cls in dataset_class_names]
        print_fun('Classes:')
        print_fun(class_names)

        class_stats = [{} for _ in range(len(dataset_class_names))]
        for cls in stored_embeddings:
            class_stats[dataset_class_names.index(cls)] = {
                'images': len(stored_embeddings[cls]),
                'embeddings': sum(len(e) for e in stored_embeddings[cls].values()),
            }

        # Train classifiers
        for algorithm in algorithms:
            if args.only_algorithm is not None and algorithm != args.only_algorithm:
                continue

            print_fun('Classifier algorithm %s' % algorithm)
            # update_data({'classifier_algorithm': args.algorithm}, use_mlboard, mlboard)
            if algorithm == 'SVM':
                model = svm.SVC(kernel='linear', probability=True)
            elif algorithm == 'kNN':
                # n_neighbors = int(round(np.sqrt(len(emb_array))))
                model = neighbors.KNeighborsClassifier(n_neighbors=args.knn_neighbors, weights='distance')
            else:
                raise RuntimeError("Classifier algorithm %s not supported" % algorithm)

            model.fit(emb_array, fit_labels)

            # Saving classifier model
            classifier_filename = get_classifier_path(classifiers_path, algorithm)
            with open(classifier_filename, 'wb') as outfile:
                pickle.dump((model, class_names, class_stats), outfile, protocol=2)
            print_fun('Saved classifier model to file "%s"' % classifier_filename)
            # update_data({'average_time_%s': '%.3fms' % average_time}, use_mlboard, mlboard)

    elif args.mode == 'CLASSIFY':

        summary_accuracy = 1

        # Classify images
        for algorithm in algorithms:
            print_fun('Testing classifier %s' % algorithm)
            classifier_filename = get_classifier_path(classifiers_path, algorithm)
            with open(classifier_filename, 'rb') as infile:
                (model, class_names, class_stats) = pickle.load(infile)

            print_fun('Loaded classifier model from file "%s"' % classifier_filename)

            predictions = model.predict_proba(emb_array)
            best_class_indices = np.argmax(predictions, axis=1)
            if isinstance(model, neighbors.KNeighborsClassifier):
                param_name = 'distance'
                # clf_name = "knn"
                (closest_distances, _) = model.kneighbors(emb_array)
                eval_values = closest_distances[:, 0]
            elif isinstance(model, svm.SVC):
                param_name = 'probability'
                # clf_name = "svm"
                eval_values = predictions[np.arange(len(best_class_indices)), best_class_indices]
            else:
                raise RuntimeError("Unsupported classifier type: %s" % type(model))

            for i in range(len(best_class_indices)):
                predicted = best_class_indices[i]
                if predicted == labels[i]:
                    print_fun('%4d  %s: %s %.3f' % (
                        i, class_names[predicted], param_name, eval_values[i],
                    ))
                else:
                    print_fun('%4d  %s: %s %.3f, WRONG! Should be %s.' % (
                        i, class_names[predicted], param_name, eval_values[i], class_names[labels[i]]),
                              )

            accuracy = np.mean(np.equal(best_class_indices, labels))
            summary_accuracy = min(summary_accuracy, accuracy)

            rpt = confusion(labels, best_class_indices, class_names,
                            use_mlboard and not args.skip_draw_confusion_matrix)
            data = {
                'accuracy': accuracy,
                # 'average_time': '%.3fms' % average_time
            }
            if not args.skip_draw_confusion_matrix:
                data['#documents.confusion_matrix.html'] = rpt
            update_data(data, use_mlboard, mlboard)

            print_fun('Accuracy for %s: %.3f' % (algorithm, accuracy))

        if args.upload_model and summary_accuracy >= args.upload_threshold:
            timestamp = datetime.datetime.now().strftime('%s')
            model_name = 'facenet-classifier'

            if args.device == 'MYRIAD':
                model_name = model_name + "-movidius"

            version = '1.0.0-%s-%s' % (args.driver, timestamp)

            print_fun('Uploading model as %s:%s' % (model_name, version))
            upload_model(
                use_mlboard,
                mlboard,
                classifiers_path,
                model_name,
                version
            )
Пример #26
0
def main(args):
    # Get the paths for the corresponding images
    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            utils.print_fun('Do not use mlboard.')
        else:
            utils.print_fun('Use mlboard parameters logging.')
            use_mlboard = True

    image_size = args.image_size
    driver_name = 'openvino'
    if os.path.isdir(args.model) and os.path.exists(
            os.path.join(args.model, 'saved_model.pb')):
        driver_name = 'tensorflow'
        image_size = 112

    data = {
        'image_size': image_size,
        'driver_name': driver_name,
        'model_path': args.model,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    img_paths, actual_issame = load_dataset(args.data_dir)
    drv = driver.load_driver(driver_name)
    serving = drv()
    serving.load_model(
        args.model,
        inputs='input:0,phase_train:0',
        outputs='embeddings:0',
        device='CPU',
        flexible_batch_size=True,
    )

    # Run forward pass to calculate embeddings
    utils.print_fun('Runnning forward pass on dataset images')

    # Enqueue one epoch of image paths and labels
    nrof_images = len(img_paths)

    data = {
        'num_images': nrof_images,
        'num_classes': nrof_images // 4,
    }
    update_data(data, use_mlboard, mlboard)

    embedding_size = list(serving.outputs.values())[0][-1]
    nrof_batches = int(np.ceil(float(nrof_images) / args.batch_size))
    emb_array = np.zeros((nrof_images, embedding_size))

    # TODO(nmakhotkin): cache embeddings by image paths (because image pairs
    #  are duplicated and no need to do inference on them)
    for i in range(nrof_batches):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = img_paths[start_index:end_index]
        probe_imgs = dataset.load_data(paths_batch,
                                       image_size,
                                       normalization=args.normalization)
        emb = _predict(serving, probe_imgs)
        emb_array[start_index:end_index, :] = emb
        if i % 5 == 4:
            utils.print_fun('{}/{}'.format(i + 1, nrof_batches))
            sys.stdout.flush()
    utils.print_fun('')
    embeddings = emb_array

    tpr, fpr, accuracy, val, val_std, far = helpers.evaluate(
        embeddings,
        actual_issame,
        nrof_folds=args.lfw_nrof_folds,
        distance_metric=args.distance_metric,
        subtract_mean=args.subtract_mean)

    rpt = report(tpr, fpr, accuracy, val, val_std, far)
    with open('report.html', 'w') as f:
        f.write(rpt)
    update_data({'#documents.report.html': rpt}, use_mlboard, mlboard)
Пример #27
0
import json

from mlboardclient.api import client

m = client.Client('http://localhost:8082/api/v2')
app = m.apps.get('11-tfexample')
task = app.task('model')

task.resource('worker')['args'] = {'common': 'yes'}


def args(num):
    for i in range(num):

        def f(t):
            t.resource('worker')['args']['arg'] = i

        yield f


logs = task.parallel_run(3, args(7))
print(json.dumps(list(logs), indent=2))
Пример #28
0
def main(args):
    use_mlboard = False
    mlboard = None
    if client:
        mlboard = client.Client()
        try:
            mlboard.apps.get()
        except Exception:
            mlboard = None
            print('Do not use mlboard.')
        else:
            print('Use mlboard parameters logging.')
            use_mlboard = True

    if args.use_split_dataset:
        dataset_tmp = facenet.get_dataset(args.data_dir)
        train_set, test_set = split_dataset(dataset_tmp,
                                            args.min_nrof_images_per_class,
                                            args.nrof_train_images_per_class)
        if args.mode == 'TRAIN':
            dataset = train_set
        elif args.mode == 'CLASSIFY':
            dataset = test_set
    else:
        dataset = facenet.get_dataset(args.data_dir)

    update_data({'mode': args.mode}, use_mlboard, mlboard)

    # Check that there are at least one training image per class
    for cls in dataset:
        assert len(
            cls.image_paths
        ) > 0, 'There must be at least one image for each class in the dataset'

    paths, labels = facenet.get_image_paths_and_labels(dataset)

    print('Number of classes: %d' % len(dataset))
    print('Number of images: %d' % len(paths))
    data = {
        'num_classes': len(dataset),
        'num_images': len(paths),
        'model_path': args.model,
        'image_size': args.image_size,
        'data_dir': args.data_dir,
        'batch_size': args.batch_size,
    }
    update_data(data, use_mlboard, mlboard)

    # Load the model
    print('Loading feature extraction model')
    xml_file = args.model
    bin_file = xml_file[:-3] + 'bin'

    net = ie.IENetwork.from_ir(xml_file, bin_file)
    extensions = os.environ.get('INTEL_EXTENSIONS_PATH')
    plugin = ie.IEPlugin(device=args.device)

    if extensions and "CPU" in args.device:
        for ext in extensions.split(':'):
            print("LOAD extension from {}".format(ext))
            plugin.add_cpu_extension(ext)

    input_name = list(net.inputs.keys())[0]
    output_name = net.outputs[0]
    exec_net = plugin.load(net)

    # Run forward pass to calculate embeddings
    print('Calculating features for images')
    nrof_images = len(paths)
    nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images /
                                           args.batch_size))
    emb_array = np.zeros((nrof_images, 512))
    for i in range(nrof_batches_per_epoch):
        start_index = i * args.batch_size
        end_index = min((i + 1) * args.batch_size, nrof_images)
        paths_batch = paths[start_index:end_index]
        for j in range(end_index - start_index):
            print('Batch {} <-> {}'.format(paths_batch[j],
                                           labels[start_index + j]))
        images = facenet.load_data(paths_batch, False, False, args.image_size)
        images = images.transpose([0, 3, 1, 2])
        feed_dict = {input_name: images}
        output = exec_net.infer(feed_dict)
        output = output[output_name]
        emb_array[start_index:end_index, :] = output

    classifier_filename_exp = os.path.expanduser(args.classifier_filename)

    if args.mode == 'TRAIN':
        # Train classifier
        print('Training classifier')
        model = svm.SVC(kernel='linear', probability=True)
        model.fit(emb_array, labels)

        # Create a list of class names
        class_names = [cls.name.replace('_', ' ') for cls in dataset]
        print('Classes:')
        print(class_names)

        # Saving classifier model
        with open(classifier_filename_exp, 'wb') as outfile:
            pickle.dump((model, class_names), outfile, protocol=2)
        print('Saved classifier model to file "%s"' % classifier_filename_exp)

    elif args.mode == 'CLASSIFY':
        # Classify images
        print('Testing classifier')
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        print('Loaded classifier model from file "%s"' %
              classifier_filename_exp)

        predictions = model.predict_proba(emb_array)
        best_class_indices = np.argmax(predictions, axis=1)
        best_class_probabilities = predictions[
            np.arange(len(best_class_indices)), best_class_indices]

        for i in range(len(best_class_indices)):
            print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]],
                                     best_class_probabilities[i]))

        accuracy = np.mean(np.equal(best_class_indices, labels))
        update_data({'accuracy': accuracy}, use_mlboard, mlboard)
        print('Accuracy: %.3f' % accuracy)

        if args.upload_model and accuracy >= args.upload_threshold:
            timestamp = datetime.datetime.now().strftime('%s')

            upload_model(use_mlboard, mlboard, classifier_filename_exp,
                         'facenet-classifier', '1.0.0-%s' % timestamp)
Пример #29
0
def main():
    targs = build_config()
    parser = ArgumentParser()
    group = parser.add_mutually_exclusive_group(required=True)
    group.set_defaults(worker=False)
    group.set_defaults(evaluator=False)
    group.add_argument('--worker',
                       dest='worker',
                       action='store_true',
                       help='Training')
    group.add_argument('--evaluator',
                       dest='evaluator',
                       action='store_true',
                       help='Continuously evaluate model')
    parser.add_argument('--training_dir')
    parser.add_argument('--research_dir')
    parser.add_argument('--build_id')
    parser.add_argument('--only_train', default='False')
    parser.add_argument('--export', type=str_bool, help='Export model')
    parser.add_argument('--model_name')
    parser.add_argument('--model_version')
    args, _ = parser.parse_known_args()

    with open('faster_rcnn.config', 'r') as cf:
        data = cf.read()
        config_html = '<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{}</pre></body></html>'.format(
            data)

    client.Client().update_task_info({'#documents.config.html': config_html})

    sys.path.append(args.research_dir)
    num_steps = targs['num_steps']
    model_dir = '{}/{}'.format(args.training_dir, args.build_id)
    config = tf.estimator.RunConfig(model_dir=model_dir)
    train_and_eval_dict = model_lib.create_estimator_and_inputs(
        run_config=config,
        hparams=model_hparams.create_hparams(None),
        pipeline_config_path='faster_rcnn.config',
        train_steps=num_steps,
        sample_1_of_n_eval_examples=1,
        sample_1_of_n_eval_on_train_examples=(5))
    estimator = train_and_eval_dict['estimator']
    train_input_fn = train_and_eval_dict['train_input_fn']
    train_steps = train_and_eval_dict['train_steps']
    eval_input_fns = train_and_eval_dict['eval_input_fns']
    if args.evaluator:
        tf.logging.info('Starting Evaluation.')
        model_name = None
        model_version = None
        if args.export:
            model_name = args.model_name
            model_version = args.model_version
        continuous_eval(estimator, model_dir, eval_input_fns[0],
                        'validation_data', args, model_name, model_version)
    elif os.environ.get("TF_CONFIG", '') != '':
        tf.logging.info('Starting Distributed.')
        eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
        predict_input_fn = train_and_eval_dict['predict_input_fn']
        train_spec, eval_specs = model_lib.create_train_and_eval_specs(
            train_input_fn,
            eval_input_fns,
            eval_on_train_input_fn,
            predict_input_fn,
            train_steps,
            eval_on_train_data=False)
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
    else:
        tf.logging.info('Starting Training.')
        estimator.train(input_fn=train_input_fn, max_steps=train_steps)
Пример #30
0
import datetime
import glob
import json
import logging
import os
import shutil
import sys
import time

LOG = logging.getLogger(__name__)
try:
    from mlboardclient.api import client

    ml_board_client = client.Client()
except:
    ml_board_client = None

meta_suff = '.__meta.json'

video_dir = os.environ['DATA_DIR']


def start_processing(config_file, job_id):
    if ml_board_client is None:
        logging.error('Unable to prepare file: ml_board client is not defined')
        return
    LOG.info('Run task %s: %s', job_id, config_file)
    app = ml_board_client.apps.get()
    task = app.task('process')
    with open(config_file, 'r') as f:
        data = json.load(f)