Exemplo n.º 1
0
def run(experiment_id, restore_path, config_file, bit, unquant_layers):
    if config_file is None and experiment_id is None:
        raise Exception("config_file or experiment_id are required")

    if experiment_id:
        environment.init(experiment_id)
        config = config_util.load_from_experiment()
        if config_file:
            config = config_util.merge(config, config_util.load(config_file))

        if restore_path is None:
            restore_file = executor.search_restore_filename(environment.CHECKPOINTS_DIR)
            restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file)

        if not os.path.exists("{}.index".format(restore_path)):
            raise Exception("restore file {} dont exists.".format(restore_path))

    else:
        experiment_id = "profile"
        environment.init(experiment_id)
        config = config_util.load(config_file)

    config.BATCH_SIZE = 1
    config.NETWORK.BATCH_SIZE = 1
    config.DATASET.BATCH_SIZE = 1

    executor.init_logging(config)
    config_util.display(config)

    _profile(config, restore_path, bit, unquant_layers)
Exemplo n.º 2
0
def run(input_dir, output_dir, experiment_id, config_file, restore_path,
        save_images):
    environment.init(experiment_id)
    config = config_util.load_from_experiment()
    if config_file:
        config = config_util.merge(config, config_util.load(config_file))

    if not os.path.isdir(input_dir):
        raise FileNotFoundError(
            "Input directory not found: '{}'".format(input_dir))

    if restore_path is None:
        restore_file = search_restore_filename(environment.CHECKPOINTS_DIR)
        restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file)

    print("Restore from {}".format(restore_path))

    if not os.path.exists("{}.index".format(restore_path)):
        raise FileNotFoundError(
            "Checkpoint file not found: '{}'".format(restore_path))

    print("---- start predict ----")

    _run(input_dir, output_dir, config, restore_path, save_images)

    print("---- end predict ----")
Exemplo n.º 3
0
def evaluate(config, restore_path, output_dir):
    if restore_path is None:
        restore_file = executor.search_restore_filename(environment.CHECKPOINTS_DIR)
        restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file)

    if not file_io.exists("{}.index".format(restore_path)):
        raise Exception("restore file {} dont exists.".format(restore_path))

    if output_dir is None:
        output_dir = os.path.join(os.path.dirname(os.path.dirname(restore_path)), "evaluate")

    logger.info(f"restore_path:{restore_path}")

    DatasetClass = config.DATASET_CLASS
    ModelClass = config.NETWORK_CLASS
    network_kwargs = {key.lower(): val for key, val in config.NETWORK.items()}

    if "test" in DatasetClass.available_subsets:
        subset = "test"
    else:
        subset = "validation"

    validation_dataset = setup_dataset(config, subset, seed=0)

    graph = tf.Graph()
    with graph.as_default():

        if ModelClass.__module__.startswith("blueoil.networks.object_detection"):
            model = ModelClass(
                classes=validation_dataset.classes,
                num_max_boxes=validation_dataset.num_max_boxes,
                is_debug=config.IS_DEBUG,
                **network_kwargs,
            )

        else:
            model = ModelClass(
                classes=validation_dataset.classes,
                is_debug=config.IS_DEBUG,
                **network_kwargs,
            )

        is_training = tf.constant(False, name="is_training")

        images_placeholder, labels_placeholder = model.placeholders()

        output = model.inference(images_placeholder, is_training)

        metrics_ops_dict, metrics_update_op = model.metrics(output, labels_placeholder)
        model.summary(output, labels_placeholder)

        summary_op = tf.compat.v1.summary.merge_all()
        metrics_summary_op = executor.metrics_summary_op(metrics_ops_dict)

        init_op = tf.compat.v1.global_variables_initializer()
        reset_metrics_op = tf.compat.v1.local_variables_initializer()
        saver = tf.compat.v1.train.Saver(max_to_keep=None)

    session_config = None  # tf.ConfigProto(log_device_placement=True)
    sess = tf.compat.v1.Session(graph=graph, config=session_config)
    sess.run([init_op, reset_metrics_op])

    validation_writer = tf.compat.v1.summary.FileWriter(environment.TENSORBOARD_DIR + "/evaluate")

    saver.restore(sess, restore_path)

    last_step = sess.run(model.global_step)

    # init metrics values
    test_step_size = int(math.ceil(validation_dataset.num_per_epoch / config.BATCH_SIZE))
    logger.info(f"test_step_size{test_step_size}")

    for test_step in range(test_step_size):
        logger.info(f"test_step{test_step}")

        images, labels = validation_dataset.feed()
        feed_dict = {
            images_placeholder: images,
            labels_placeholder: labels,
        }

        # Summarize at only last step.
        if test_step == test_step_size - 1:
            summary, _ = sess.run([summary_op, metrics_update_op], feed_dict=feed_dict)
            validation_writer.add_summary(summary, last_step)
        else:
            sess.run([metrics_update_op], feed_dict=feed_dict)

    metrics_summary = sess.run(metrics_summary_op)
    validation_writer.add_summary(metrics_summary, last_step)

    is_tfds = "TFDS_KWARGS" in config.DATASET
    dataset_name = config.DATASET.TFDS_KWARGS["name"] if is_tfds else config.DATASET_CLASS.__name__
    dataset_path = config.DATASET.TFDS_KWARGS["data_dir"] if is_tfds else ""

    metrics_dict = {
        'task_type': config.TASK.value,
        'network_name': config.NETWORK_CLASS.__name__,
        'dataset_name': dataset_name,
        'dataset_path': dataset_path,
        'last_step': int(last_step),
        'metrics': {k: float(sess.run(op)) for k, op in metrics_ops_dict.items()},
    }
    save_json(output_dir, json.dumps(metrics_dict, indent=4,), metrics_dict["last_step"])
    validation_dataset.close()
Exemplo n.º 4
0
def _export(config, restore_path, image_path):
    if restore_path is None:
        restore_file = executor.search_restore_filename(
            environment.CHECKPOINTS_DIR)
        restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file)

    print("Restore from {}".format(restore_path))

    if not os.path.exists("{}.index".format(restore_path)):
        raise Exception("restore file {} dont exists.".format(restore_path))

    output_root_dir = os.path.join(environment.EXPERIMENT_DIR, "export")
    output_root_dir = os.path.join(output_root_dir,
                                   os.path.basename(restore_path))

    if not os.path.exists(output_root_dir):
        os.makedirs(output_root_dir)

    graph = tf.Graph()
    ModelClass = config.NETWORK_CLASS
    network_kwargs = dict(
        (key.lower(), val) for key, val in config.NETWORK.items())

    with graph.as_default():

        model = ModelClass(
            classes=config.CLASSES,
            is_debug=config.IS_DEBUG,
            **network_kwargs,
        )

        is_training = tf.constant(False, name="is_training")

        images_placeholder, _ = model.placeholders()
        model.inference(images_placeholder, is_training)
        init_op = tf.compat.v1.global_variables_initializer()

        saver = tf.compat.v1.train.Saver(max_to_keep=50)

    session_config = tf.compat.v1.ConfigProto()
    sess = tf.compat.v1.Session(graph=graph, config=session_config)
    sess.run(init_op)

    saver.restore(sess, restore_path)

    main_output_dir = os.path.join(
        output_root_dir, "{}x{}".format(config.IMAGE_SIZE[0],
                                        config.IMAGE_SIZE[1]))
    if not os.path.exists(main_output_dir):
        os.makedirs(main_output_dir)

    # save inference values as npy files for runtime inference test and debug.
    if image_path:
        all_ops = _minimal_operations(sess)
        inference_values_output_dir = os.path.join(main_output_dir,
                                                   "inference_test_data")

        if not os.path.exists(inference_values_output_dir):
            os.makedirs(inference_values_output_dir)

        raw_image = load_image(image_path)
        image = _pre_process(raw_image, config.PRE_PROCESSOR,
                             config.DATA_FORMAT)
        images = np.expand_dims(image, axis=0)
        feed_dict = {
            images_placeholder: images,
        }

        all_outputs = []
        index = 0
        for op in all_ops:
            for op_output in op.outputs:
                # HACK: This is for TensorFlow bug workaround.
                # We can remove following 4 lines once it's been resolved in TensorFlow
                # Issue link: https://github.com/tensorflow/tensorflow/issues/36456
                if (not tf.config.experimental.list_physical_devices('GPU')
                        and "FusedBatchNormV3" in op_output.name and int(
                            op_output.name.split(":")[1]) in set(range(1, 6))):
                    continue
                val = sess.run(op_output.name, feed_dict=feed_dict)
                name = '%03d' % index + '_' + op_output.name.replace('/', '_')
                all_outputs.append({'val': val, 'name': name})
                index += 1

        _save_all_operation_outputs(image_path, inference_values_output_dir,
                                    image, raw_image, all_outputs,
                                    config.IMAGE_SIZE)

    yaml_names = config_util.save_yaml(main_output_dir, config)
    pb_name = executor.save_pb_file(sess, main_output_dir)

    message = """
Create pb and yaml files in: {}
pb: {}
yaml: {}, {}
""".format(main_output_dir, pb_name, *yaml_names)

    if image_path:
        message += "Create npy files in under `inference_test_data` folder \n"
        message += "npy: {}".format([d["name"] for d in all_outputs] + [
            "raw_image",
            "preprocessed_image",
        ])

    print(message)
    print("finish")

    return main_output_dir
Exemplo n.º 5
0
def _run(config_file, experiment_id, restore_path, image_size, step_size, cpu):

    if experiment_id:
        environment.init(experiment_id)
        config = config_util.load_from_experiment()
        if config_file:
            config = config_util.merge(config, config_util.load(config_file))

        if restore_path is None:
            restore_file = executor.search_restore_filename(
                environment.CHECKPOINTS_DIR)
            restore_path = os.path.join(environment.CHECKPOINTS_DIR,
                                        restore_file)

        if not os.path.exists("{}.index".format(restore_path)):
            raise Exception(
                "restore file {} dont exists.".format(restore_path))

    else:
        experiment_id = "measure_latency"
        environment.init(experiment_id)
        config = config_util.load(config_file)

    config.BATCH_SIZE = 1
    config.NETWORK.BATCH_SIZE = 1
    config.DATASET.BATCH_SIZE = 1

    if list(image_size) != [None, None]:
        config.IMAGE_SIZE = list(image_size)
        config.NETWORK.IMAGE_SIZE = list(image_size)

        # override pre processes image size.
        if config.PRE_PROCESSOR:
            config.PRE_PROCESSOR.set_image_size(image_size)

        # override post processes image size.
        if config.POST_PROCESSOR:
            config.POST_PROCESSOR.set_image_size(image_size)

        print("Override IMAGE_SIZE", config.IMAGE_SIZE)

    executor.init_logging(config)
    config_util.display(config)

    overall_times, only_network_times = _measure_time(config, restore_path,
                                                      step_size)

    overall_times = np.array(overall_times)
    only_network_times = np.array(only_network_times)
    # list of physical_device_desc
    devices = [
        device.physical_device_desc
        for device in device_lib.list_local_devices()
        if device.physical_device_desc
    ]

    message = """
---- measure latency result ----
total number of execution (number of samples): {}
network: {}
use gpu by network: {}
image size: {}
devices: {}

* overall (include pre-post-process which execute on cpu)
total time: {:.4f} msec
latency
   mean (SD=standard deviation): {:.4f} (SD={:.4f}) msec, min: {:.4f} msec, max: {:.4f} msec
FPS
   mean (SD=standard deviation): {:.4f} (SD={:.4f}), min: {:.4f}, max: {:.4f}

* network only (exclude pre-post-process):
total time: {:.4f} msec
latency
   mean (SD=standard deviation): {:.4f} (SD={:.4f}) msec, min: {:.4f} msec, max: {:.4f} msec
FPS
   mean (SD=standard deviation): {:.4f} (SD={:.4f}), min: {:.4f}, max: {:.4f}
---- measure latency result ----
""".format(
        step_size,
        config.NETWORK_CLASS.__name__,
        not cpu,
        config.IMAGE_SIZE,
        devices,
        # overall
        np.sum(overall_times) * 1000,
        # latency
        np.mean(overall_times) * 1000,
        np.std(overall_times) * 1000,
        np.min(overall_times) * 1000,
        np.max(overall_times) * 1000,
        # FPS
        np.mean(1 / overall_times),
        np.std(1 / overall_times),
        np.min(1 / overall_times),
        np.max(1 / overall_times),
        # network only
        np.sum(only_network_times) * 1000,
        # latency
        np.mean(only_network_times) * 1000,
        np.std(only_network_times) * 1000,
        np.min(only_network_times) * 1000,
        np.max(only_network_times) * 1000,
        # FPS
        np.mean(1 / only_network_times),
        np.std(1 / only_network_times),
        np.min(1 / only_network_times),
        np.max(1 / only_network_times),
    )

    print(message)