Exemplo n.º 1
0
        gpu_memory_fraction=FLAGS.gpu_memory_fraction,
        gpu_id=FLAGS.gpu_id,
        seed=FLAGS.seed)

    if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]:
        runner.train(iter_unit=FLAGS.iter_unit,
                     num_iter=FLAGS.num_iter,
                     run_iter=FLAGS.run_iter,
                     batch_size=FLAGS.batch_size,
                     warmup_steps=FLAGS.warmup_steps,
                     log_every_n_steps=FLAGS.display_every,
                     weight_decay=FLAGS.weight_decay,
                     lr_init=FLAGS.lr_init,
                     lr_warmup_epochs=FLAGS.lr_warmup_epochs,
                     momentum=FLAGS.momentum,
                     loss_scale=FLAGS.static_loss_scale,
                     label_smoothing=FLAGS.label_smoothing,
                     mixup=FLAGS.mixup,
                     use_static_loss_scaling=(FLAGS.static_loss_scale != -1),
                     use_cosine_lr=FLAGS.cosine_lr,
                     is_benchmark=FLAGS.mode == 'training_benchmark',
                     use_final_conv=FLAGS.use_final_conv,
                     quantize=FLAGS.quantize,
                     symmetric=FLAGS.symmetric,
                     quant_delay=FLAGS.quant_delay,
                     use_qdq=FLAGS.use_qdq,
                     finetune_checkpoint=FLAGS.finetune_checkpoint)

    if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:

        if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError(
Exemplo n.º 2
0
        use_xla=RUNNING_CONFIG.use_xla,
        use_tf_amp=RUNNING_CONFIG.use_tf_amp,
        use_dali=RUNNING_CONFIG.use_dali,
        gpu_memory_fraction=RUNNING_CONFIG.gpu_memory_fraction,
        seed=RUNNING_CONFIG.seed
    )

    if RUNNING_CONFIG.mode in ["train", "train_and_evaluate", "training_benchmark"]:

        runner.train(
            iter_unit=RUNNING_CONFIG.iter_unit,
            num_iter=RUNNING_CONFIG.num_iter,
            batch_size=RUNNING_CONFIG.batch_size,
            warmup_steps=RUNNING_CONFIG.warmup_steps,
            log_every_n_steps=RUNNING_CONFIG.log_every_n_steps,
            weight_decay=RUNNING_CONFIG.weight_decay,
            learning_rate_init=RUNNING_CONFIG.learning_rate_init,
            momentum=RUNNING_CONFIG.momentum,
            loss_scale=RUNNING_CONFIG.loss_scale,
            use_static_loss_scaling=FLAGS.use_static_loss_scaling,
            is_benchmark=RUNNING_CONFIG.mode == 'training_benchmark',
        )

    if RUNNING_CONFIG.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:

        if RUNNING_CONFIG.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError("Only single GPU inference is implemented.")

        elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:

            runner.evaluate(
Exemplo n.º 3
0
        # ======= Optimization HParams ======== #
        use_xla=FLAGS.use_xla,
        use_tf_amp=FLAGS.use_tf_amp,
        
        seed=FLAGS.seed
    )
    
    if FLAGS.mode == "train_and_evaluate" and FLAGS.eval_every > 0:

        for i in range(int(FLAGS.num_iter / FLAGS.eval_every)):
            
            runner.train(
                iter_unit=FLAGS.iter_unit,
                num_iter=FLAGS.eval_every,
                batch_size=FLAGS.batch_size,
                warmup_steps=FLAGS.warmup_steps,
                weight_decay=FLAGS.weight_decay,
                learning_rate_init=FLAGS.lr_init,
                momentum=FLAGS.momentum,
                is_benchmark=FLAGS.mode == 'training_benchmark'
            )
                
            runner.evaluate(
                iter_unit= "epoch",
                num_iter= 1,
                warmup_steps=FLAGS.warmup_steps,
                batch_size=FLAGS.batch_size,
                is_benchmark=FLAGS.mode == 'inference_benchmark'
            )
        
    else:  
Exemplo n.º 4
0
        # Debug Params
        debug_verbosity=RUNNING_CONFIG.debug_verbosity,
        log_every_n_steps=RUNNING_CONFIG.log_every_n_steps,
        seed=RUNNING_CONFIG.seed)

    if RUNNING_CONFIG.exec_mode in [
            "train", "train_and_evaluate", "training_benchmark"
    ]:
        runner.train(
            iter_unit=RUNNING_CONFIG.iter_unit,
            num_iter=RUNNING_CONFIG.num_iter,
            batch_size=RUNNING_CONFIG.batch_size,
            warmup_steps=RUNNING_CONFIG.warmup_steps,
            weight_decay=RUNNING_CONFIG.weight_decay,
            learning_rate=RUNNING_CONFIG.learning_rate,
            learning_rate_decay_factor=RUNNING_CONFIG.
            learning_rate_decay_factor,
            learning_rate_decay_steps=RUNNING_CONFIG.learning_rate_decay_steps,
            rmsprop_decay=RUNNING_CONFIG.rmsprop_decay,
            rmsprop_momentum=RUNNING_CONFIG.rmsprop_momentum,
            use_auto_loss_scaling=FLAGS.use_auto_loss_scaling,
            augment_data=RUNNING_CONFIG.augment_data,
            is_benchmark=RUNNING_CONFIG.exec_mode == 'training_benchmark')

    if RUNNING_CONFIG.exec_mode in [
            "train_and_evaluate", 'evaluate', 'inference_benchmark'
    ] and hvd.rank() == 0:
        runner.evaluate(
            iter_unit=RUNNING_CONFIG.iter_unit
            if RUNNING_CONFIG.exec_mode != "train_and_evaluate" else "epoch",
            num_iter=RUNNING_CONFIG.num_iter
Exemplo n.º 5
0
        use_dali=RUNNING_CONFIG.use_dali,
        gpu_memory_fraction=RUNNING_CONFIG.gpu_memory_fraction,
        seed=RUNNING_CONFIG.seed)

    if RUNNING_CONFIG.mode in [
            "train", "train_and_evaluate", "training_benchmark"
    ]:

        runner.train(
            iter_unit=RUNNING_CONFIG.iter_unit,
            num_iter=RUNNING_CONFIG.num_iter,
            batch_size=RUNNING_CONFIG.batch_size,
            warmup_steps=RUNNING_CONFIG.warmup_steps,
            log_every_n_steps=RUNNING_CONFIG.log_every_n_steps,
            weight_decay=RUNNING_CONFIG.weight_decay,
            lr_init=RUNNING_CONFIG.lr_init,
            lr_warmup_epochs=RUNNING_CONFIG.lr_warmup_epochs,
            momentum=RUNNING_CONFIG.momentum,
            loss_scale=RUNNING_CONFIG.loss_scale,
            label_smoothing=RUNNING_CONFIG.label_smoothing,
            use_static_loss_scaling=RUNNING_CONFIG.use_static_loss_scaling,
            use_cosine_lr=RUNNING_CONFIG.use_cosine_lr,
            is_benchmark=RUNNING_CONFIG.mode == 'training_benchmark',
        )

    if RUNNING_CONFIG.mode in [
            "train_and_evaluate", 'evaluate', 'inference_benchmark'
    ]:

        if RUNNING_CONFIG.mode == 'inference_benchmark' and hvd_utils.is_using_hvd(
        ):
            raise NotImplementedError(
Exemplo n.º 6
0
    if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
        # Prepare Model Dir
        log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename)
        os.makedirs(FLAGS.model_dir, exist_ok=True)
        # Setup dlLogger
        backends+=[
            JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path),
            StdOutBackend(verbosity=Verbosity.DEFAULT)
        ]
    DLLogger.init(backends=backends)
    DLLogger.log(data=vars(FLAGS), step='PARAMETER')

    runner = Runner(FLAGS, DLLogger)

    if FLAGS.mode in ["train", "train_and_eval", "training_benchmark"]:
        runner.train()
        
    if FLAGS.mode in ['eval', 'evaluate', 'inference_benchmark']:
        if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError("Only single GPU inference is implemented.")
        elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
            runner.evaluate()
            
    if FLAGS.mode == 'predict':
        if FLAGS.to_predict is None:
            raise ValueError("No data to predict on.")

        if not os.path.isdir(FLAGS.to_predict):
            raise ValueError("Provide directory with images to infer!")

        if hvd_utils.is_using_hvd():
Exemplo n.º 7
0
        use_tf_amp=FLAGS.use_tf_amp,
        use_dali=FLAGS.use_dali,
        gpu_memory_fraction=FLAGS.gpu_memory_fraction,
        gpu_id=FLAGS.gpu_id,
        seed=FLAGS.seed)

    if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]:
        runner.train(
            iter_unit=FLAGS.iter_unit,
            num_iter=FLAGS.num_iter,
            run_iter=FLAGS.run_iter,
            batch_size=FLAGS.batch_size,
            warmup_steps=FLAGS.warmup_steps,
            log_every_n_steps=FLAGS.display_every,
            weight_decay=FLAGS.weight_decay,
            lr_init=FLAGS.lr_init,
            lr_warmup_epochs=FLAGS.lr_warmup_epochs,
            momentum=FLAGS.momentum,
            loss_scale=FLAGS.loss_scale,
            label_smoothing=FLAGS.label_smoothing,
            mixup=FLAGS.mixup,
            use_static_loss_scaling=FLAGS.use_static_loss_scaling,
            use_cosine_lr=FLAGS.use_cosine_lr,
            is_benchmark=FLAGS.mode == 'training_benchmark',
        )

    if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:

        if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
            raise NotImplementedError(
                "Only single GPU inference is implemented.")