Exemplo n.º 1
0
def get_model(
    *,
    model_dir: str,
    arch: str = "resnet50",
    precision: str = "fp32",
    use_xla: bool = True,
    use_tf_amp: bool = False,
    use_dali: bool = False,
    gpu_memory_fraction=0.7,
):
    from runtime import Runner
    from utils import hvd_wrapper as hvd

    hvd.init()

    try:
        dtype = {"fp16": tf.float16, "fp32": tf.float32}[precision.lower()]
    except KeyError:
        raise ValueError(
            f"Uknown precision {precision}. Allowed values: fp16|fp32")

    LOGGER.info(
        f"Creating model arch={arch} precision={precision} xla={use_xla}"
        f"tf_amp={use_tf_amp}, dali={use_dali}, gpu_memory_frac={gpu_memory_fraction}"
    )

    runner = Runner(
        n_classes=NCLASSES,
        architecture=arch,
        input_format=INPUT_FORMAT,
        compute_format=COMPUTE_FORMAT,
        dtype=dtype,
        n_channels=NCHANNELS,
        height=HEIGHT,
        width=WIDTH,
        use_xla=use_xla,
        use_tf_amp=use_tf_amp,
        use_dali=use_dali,
        gpu_memory_fraction=gpu_memory_fraction,
        gpu_id=0,
        model_dir=model_dir,
    )

    # removed params not used in inference
    estimator_params = {
        "use_final_conv": False
    }  # TODO: Why not moved to model constructor?
    estimator = runner._get_estimator(
        mode="inference",
        run_params=estimator_params,
        use_xla=use_xla,
        use_dali=use_dali,
        gpu_memory_fraction=gpu_memory_fraction,
    )
    return estimator
Exemplo n.º 2
0
        seed=FLAGS.seed,
    )

    # ===================================

    runner = Runner(
        # ========= Model HParams ========= #
        n_classes=RUNNING_CONFIG.n_classes,
        input_format=RUNNING_CONFIG.input_format,
        compute_format=RUNNING_CONFIG.compute_format,
        dtype=RUNNING_CONFIG.dtype,
        n_channels=RUNNING_CONFIG.n_channels,
        height=RUNNING_CONFIG.height,
        width=RUNNING_CONFIG.width,
        distort_colors=RUNNING_CONFIG.distort_colors,
        log_dir=RUNNING_CONFIG.log_dir,
        model_dir=RUNNING_CONFIG.model_dir,
        data_dir=RUNNING_CONFIG.data_dir,
        data_idx_dir=RUNNING_CONFIG.data_idx_dir,

        # ======= Optimization HParams ======== #
        use_xla=RUNNING_CONFIG.use_xla,
        use_tf_amp=RUNNING_CONFIG.use_tf_amp,
        use_dali=RUNNING_CONFIG.use_dali,
        gpu_memory_fraction=RUNNING_CONFIG.gpu_memory_fraction,
        seed=RUNNING_CONFIG.seed
    )

    if RUNNING_CONFIG.mode in ["train", "train_and_evaluate", "training_benchmark"]:

        runner.train(
            iter_unit=RUNNING_CONFIG.iter_unit,
Exemplo n.º 3
0
    else:
        dllogger.init(backends=[])
    dllogger.log(data=vars(FLAGS), step='PARAMETER')

    runner = Runner(
        # ========= Model HParams ========= #
        n_classes=1001,
        architecture=FLAGS.arch,
        input_format='NHWC',
        compute_format=FLAGS.data_format,
        dtype=tf.float32,
        n_channels=3,
        height=224 if FLAGS.data_dir else FLAGS.synthetic_data_size,
        width=224 if FLAGS.data_dir else FLAGS.synthetic_data_size,
        distort_colors=False,
        log_dir=FLAGS.results_dir,
        model_dir=FLAGS.model_dir
        if FLAGS.model_dir is not None else FLAGS.results_dir,
        data_dir=FLAGS.data_dir,
        data_idx_dir=FLAGS.data_idx_dir,
        weight_init=FLAGS.weight_init,
        use_xla=FLAGS.xla,
        use_tf_amp=FLAGS.amp,
        use_dali=FLAGS.dali,
        use_cpu=FLAGS.cpu,
        gpu_memory_fraction=FLAGS.gpu_memory_fraction,
        gpu_id=FLAGS.gpu_id,
        seed=FLAGS.seed)

    if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]:
        runner.train(iter_unit=FLAGS.iter_unit,
                     num_iter=FLAGS.num_iter,
Exemplo n.º 4
0
_NUM_CLASSES = 3 

if __name__ == "__main__":

    tf.logging.set_verbosity(tf.logging.ERROR)

    FLAGS = parse_cmdline()
    
    runner = Runner(
        # ========= Model HParams ========= #
        n_classes=_NUM_CLASSES,
        
        log_dir=FLAGS.results_dir,
        model_dir=FLAGS.results_dir,
        data_dir=FLAGS.data_dir,
        pre_trained_model_path=FLAGS.pre_trained_model_path,
        use_transpose_conv=FLAGS.use_transpose_conv,
        
        # ======= Optimization HParams ======== #
        use_xla=FLAGS.use_xla,
        use_tf_amp=FLAGS.use_tf_amp,
        
        seed=FLAGS.seed
    )
    
    if FLAGS.mode == "train_and_evaluate" and FLAGS.eval_every > 0:

        for i in range(int(FLAGS.num_iter / FLAGS.eval_every)):
            
            runner.train(
                iter_unit=FLAGS.iter_unit,
                num_iter=FLAGS.eval_every,
Exemplo n.º 5
0
    runner = Runner(
        input_format=RUNNING_CONFIG.input_format,
        compute_format=RUNNING_CONFIG.compute_format,
        n_channels=RUNNING_CONFIG.n_channels,
        model_variant=RUNNING_CONFIG.unet_variant,
        activation_fn=RUNNING_CONFIG.activation_fn,
        input_shape=RUNNING_CONFIG.input_shape,
        mask_shape=RUNNING_CONFIG.mask_shape,
        input_normalization_method=RUNNING_CONFIG.input_normalization_method,

        # Training HParams
        augment_data=RUNNING_CONFIG.augment_data,
        loss_fn_name=RUNNING_CONFIG.loss_fn_name,
        weight_init_method=RUNNING_CONFIG.weight_init_method,

        #  Runtime HParams
        use_tf_amp=RUNNING_CONFIG.use_tf_amp,
        use_xla=RUNNING_CONFIG.use_xla,

        # Directory Params
        log_dir=RUNNING_CONFIG.log_dir,
        model_dir=RUNNING_CONFIG.model_dir,
        sample_dir=RUNNING_CONFIG.sample_dir,
        data_dir=RUNNING_CONFIG.data_dir,
        dataset_name=RUNNING_CONFIG.dataset_name,
        dataset_hparams=RUNNING_CONFIG.dataset_hparams,

        # Debug Params
        debug_verbosity=RUNNING_CONFIG.debug_verbosity,
        log_every_n_steps=RUNNING_CONFIG.log_every_n_steps,
        seed=RUNNING_CONFIG.seed)
Exemplo n.º 6
0
    set_flags(FLAGS)

    backends = []
    if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
        # Prepare Model Dir
        log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename)
        os.makedirs(FLAGS.model_dir, exist_ok=True)
        # Setup dlLogger
        backends+=[
            JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path),
            StdOutBackend(verbosity=Verbosity.DEFAULT)
        ]
    DLLogger.init(backends=backends)
    DLLogger.log(data=vars(FLAGS), step='PARAMETER')

    runner = Runner(FLAGS, DLLogger)

    if FLAGS.mode in ["train", "train_and_eval", "training_benchmark"]:
        runner.train()
        
    if FLAGS.mode in ['eval', 'evaluate', 'inference_benchmark']:
        if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError("Only single GPU inference is implemented.")
        elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
            runner.evaluate()
            
    if FLAGS.mode == 'predict':
        if FLAGS.to_predict is None:
            raise ValueError("No data to predict on.")

        if not os.path.isdir(FLAGS.to_predict):
Exemplo n.º 7
0
    set_flags(FLAGS)

    backends = []
    if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
        # Prepare Model Dir
        log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename)
        os.makedirs(FLAGS.model_dir, exist_ok=True)
        # Setup dlLogger
        backends += [
            JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path),
            StdOutBackend(verbosity=Verbosity.DEFAULT)
        ]
    DLLogger.init(backends=backends)
    DLLogger.log(data=vars(FLAGS), step='PARAMETER')

    runner = Runner(FLAGS, DLLogger)

    if FLAGS.mode in ["train", "train_and_eval", "training_benchmark"]:
        runner.train()

    if FLAGS.mode in ['eval', 'evaluate', 'inference_benchmark']:
        if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError(
                "Only single GPU inference is implemented.")
        elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
            runner.evaluate()

    if FLAGS.mode == 'predict':
        if FLAGS.to_predict is None:
            raise ValueError("No data to predict on.")
Exemplo n.º 8
0
    else:
        dllogger.init(backends=[])
    dllogger.log(data=vars(FLAGS), step='PARAMETER')

    runner = Runner(
        # ========= Model HParams ========= #
        n_classes=1001,
        architecture=FLAGS.arch,
        input_format='NHWC',
        compute_format=FLAGS.data_format,
        dtype=tf.float32 if FLAGS.precision == 'fp32' else tf.float16,
        n_channels=3,
        height=224,
        width=224,
        distort_colors=False,
        log_dir=FLAGS.results_dir,
        model_dir=FLAGS.model_dir
        if FLAGS.model_dir is not None else FLAGS.results_dir,
        data_dir=FLAGS.data_dir,
        data_idx_dir=FLAGS.data_idx_dir,
        weight_init=FLAGS.weight_init,
        use_xla=FLAGS.use_xla,
        use_tf_amp=FLAGS.use_tf_amp,
        use_dali=FLAGS.use_dali,
        gpu_memory_fraction=FLAGS.gpu_memory_fraction,
        gpu_id=FLAGS.gpu_id,
        seed=FLAGS.seed)

    if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]:
        runner.train(
            iter_unit=FLAGS.iter_unit,
Exemplo n.º 9
0
    backends = []
    if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
        # Prepare Model Dir
        os.makedirs(config.model_dir, exist_ok=True)

        # Setup dlLogger
        backends += [
            JSONStreamBackend(verbosity=Verbosity.VERBOSE,
                              filename=config.log_filename),
            StdOutBackend(verbosity=Verbosity.DEFAULT)
        ]
    DLLogger.init(backends=backends)
    DLLogger.log(data=vars(config), step='PARAMETER')

    #========== initialize the runner
    runner = Runner(config, DLLogger)

    #========== determine the operation mode of the runner (tr,eval,predict)
    if config.mode in ["train", "train_and_eval", "training_benchmark"]:
        runner.train()
    if config.mode in ['eval', 'evaluate', 'inference_benchmark']:
        if config.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError(
                "Only single GPU inference is implemented.")
        elif hvd_utils.is_using_hvd():
            raise NotImplementedError(
                "Only single GPU evaluation is implemented.")
        else:
            runner.evaluate()
    if config.mode == 'predict':
        if config.predict_img_dir is None: