Пример #1
0
    def test_task(self, config_name):
        input_image_size = [224, 224]
        test_tfrecord_file = os.path.join(self.get_temp_dir(),
                                          'cls_test.tfrecord')
        example = tf.train.Example.FromString(
            tfexample_utils.create_classification_example(
                image_height=input_image_size[0],
                image_width=input_image_size[1]))
        self._create_test_tfrecord(tfrecord_file=test_tfrecord_file,
                                   example=example,
                                   num_samples=10)

        config = exp_factory.get_exp_config(config_name)
        config.task.train_data.global_batch_size = 2
        config.task.validation_data.input_path = test_tfrecord_file
        config.task.train_data.input_path = test_tfrecord_file
        task = img_cls_task.ImageClassificationTask(config.task)
        model = task.build_model()
        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        logs = task.train_step(next(iterator),
                               model,
                               optimizer,
                               metrics=metrics)
        for metric in metrics:
            logs[metric.name] = metric.result()
        self.assertIn('loss', logs)
        self.assertIn('accuracy', logs)
        self.assertIn('top_5_accuracy', logs)
        logs = task.validation_step(next(iterator), model, metrics=metrics)
        for metric in metrics:
            logs[metric.name] = metric.result()
        self.assertIn('loss', logs)
        self.assertIn('accuracy', logs)
        self.assertIn('top_5_accuracy', logs)
    def test_task(self, config_name):
        config_to_backbone_mapping = {
            'deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32':
            'mobilenet_edgetpu_v2_xs',
            'deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32':
            'mobilenet_edgetpu_v2_s',
            'deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32':
            'mobilenet_edgetpu_v2_m',
        }
        config = seg_cfg.seg_deeplabv3plus_ade20k_32(
            config_to_backbone_mapping[config_name], init_backbone=False)
        config.task.train_data.global_batch_size = 1
        config.task.train_data.shuffle_buffer_size = 2
        config.task.validation_data.shuffle_buffer_size = 2
        config.task.validation_data.global_batch_size = 1
        config.task.train_data.output_size = [32, 32]
        config.task.validation_data.output_size = [32, 32]
        config.task.model.decoder.aspp.pool_kernel_size = None
        config.task.model.backbone.dilated_resnet.model_id = 50
        config.task.model.backbone.dilated_resnet.output_stride = 16

        task = img_seg_task.CustomSemanticSegmentationTask(config.task)
        model = task.build_model()
        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        logs = task.train_step(next(iterator),
                               model,
                               optimizer,
                               metrics=metrics)
        self.assertIn('loss', logs)
        logs = task.validation_step(next(iterator),
                                    model,
                                    metrics=task.build_metrics(training=False))
        self.assertIn('loss', logs)
Пример #3
0
    def test_task(self, config_name):
        config_to_backbone_mapping = {
            'autoseg_edgetpu_xs': 'autoseg_edgetpu_backbone_xs',
            'autoseg_edgetpu_s': 'autoseg_edgetpu_backone_s'
        }
        config = autoseg_cfg.autoseg_edgetpu_experiment_config(
            config_to_backbone_mapping[config_name], init_backbone=False)
        config.task.train_data.global_batch_size = 2
        config.task.train_data.shuffle_buffer_size = 2
        config.task.validation_data.shuffle_buffer_size = 2
        config.task.validation_data.global_batch_size = 2
        config.task.train_data.output_size = [512, 512]
        config.task.validation_data.output_size = [512, 512]

        task = img_seg_task.AutosegEdgeTPUTask(config.task)
        model = task.build_model()
        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        if isinstance(optimizer, optimization.ExponentialMovingAverage
                      ) and not optimizer.has_shadow_copy:
            optimizer.shadow_copy(model)

        logs = task.train_step(next(iterator),
                               model,
                               optimizer,
                               metrics=metrics)
        self.assertIn('loss', logs)
        logs = task.validation_step(next(iterator),
                                    model,
                                    metrics=task.build_metrics(training=False))
        self.assertIn('loss', logs)
        model.summary()
Пример #4
0
    def testTaskWithUnstructuredSparsity(self, config_name):
        config = exp_factory.get_exp_config(config_name)
        config.task.train_data.global_batch_size = 2

        task = img_cls_task.ImageClassificationTask(config.task)
        model = task.build_model()

        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())

        if isinstance(optimizer, optimization.ExponentialMovingAverage
                      ) and not optimizer.has_shadow_copy:
            optimizer.shadow_copy(model)

        if config.task.pruning:
            # This is an auxilary initialization required to prune a model which is
            # originally done in the train library.
            actions.PruningAction(export_dir=tempfile.gettempdir(),
                                  model=model,
                                  optimizer=optimizer)

        # Check all layers and target weights are successfully pruned.
        self._validate_model_pruned(model, config_name)

        logs = task.train_step(next(iterator),
                               model,
                               optimizer,
                               metrics=metrics)
        self._validate_metrics(logs, metrics)

        logs = task.validation_step(next(iterator), model, metrics=metrics)
        self._validate_metrics(logs, metrics)
Пример #5
0
  def test_retinanet_task(self, test_config, is_training):
    """RetinaNet task test for training and val using toy configs."""
    input_image_size = [384, 384]
    test_tfrecord_file = os.path.join(self.get_temp_dir(), 'det_test.tfrecord')
    example = tfexample_utils.create_detection_test_example(
        image_height=input_image_size[0],
        image_width=input_image_size[1],
        image_channel=3,
        num_instances=10)
    self._create_test_tfrecord(
        tfrecord_file=test_tfrecord_file, example=example, num_samples=10)
    config = exp_factory.get_exp_config(test_config)
    # modify config to suit local testing
    config.task.model.input_size = [128, 128, 3]
    config.trainer.steps_per_loop = 1
    config.task.train_data.global_batch_size = 1
    config.task.validation_data.global_batch_size = 1
    config.task.train_data.shuffle_buffer_size = 2
    config.task.validation_data.shuffle_buffer_size = 2
    config.task.validation_data.input_path = test_tfrecord_file
    config.task.train_data.input_path = test_tfrecord_file
    config.task.annotation_file = None
    config.train_steps = 1

    task = retinanet.RetinaNetTask(config.task)
    model = task.build_model()
    metrics = task.build_metrics(training=is_training)

    strategy = tf.distribute.get_strategy()

    data_config = config.task.train_data if is_training else config.task.validation_data
    dataset = orbit.utils.make_distributed_dataset(strategy, task.build_inputs,
                                                   data_config)
    iterator = iter(dataset)
    opt_factory = optimization.OptimizerFactory(config.trainer.optimizer_config)
    optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate())

    if is_training:
      task.train_step(next(iterator), model, optimizer, metrics=metrics)
    else:
      task.validation_step(next(iterator), model, metrics=metrics)
Пример #6
0
  def build_optimizer(self, config):
    """Creates optimier for the fused model."""
    optimizer_config = self.optimizer_config.replace(
        learning_rate={
            'polynomial': {
                'decay_steps': config.decay_steps,
                'initial_learning_rate': config.initial_learning_rate,
                'end_learning_rate': config.end_learning_rate,
            }
        },
        warmup={
            'type': 'linear',
            'linear': {
                'warmup_steps': config.warmup_steps,
            }
        })
    logging.info('The optimizer config is: %s', optimizer_config.as_dict())

    optimizer_factory = optimization.OptimizerFactory(optimizer_config)
    return optimizer_factory.build_optimizer(
        optimizer_factory.build_learning_rate())
Пример #7
0
    def get_optimizer(self, stage_id):
        """Build optimizer for each stage."""
        params = self._optimizer_config.replace(
            warmup={
                'linear': {
                    'warmup_steps':
                    self.task_config.stage_list[stage_id].warmup_steps
                },
            },
            learning_rate={
                'power': {
                    'initial_learning_rate':
                    self.task_config.stage_list[stage_id].initial_learning_rate
                },
            },
        )
        opt_factory = optimization.OptimizerFactory(params)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())

        return optimizer
Пример #8
0
    def create_optimizer(
            cls,
            optimizer_config: OptimizationConfig,
            runtime_config: Optional[RuntimeConfig] = None,
            dp_config: Optional[DifferentialPrivacyConfig] = None):
        """Creates an TF optimizer from configurations.

    Args:
      optimizer_config: the parameters of the Optimization settings.
      runtime_config: the parameters of the runtime.
      dp_config: the parameter of differential privacy.

    Returns:
      A tf.optimizers.Optimizer object.
    """
        gradient_transformers = None
        if dp_config is not None:
            logging.info(
                "Adding differential privacy transform with config %s.",
                dp_config.as_dict())
            noise_stddev = dp_config.clipping_norm * dp_config.noise_multiplier
            gradient_transformers = [
                functools.partial(ops.clip_l2_norm,
                                  l2_norm_clip=dp_config.clipping_norm),
                functools.partial(ops.add_noise, noise_stddev=noise_stddev)
            ]

        opt_factory = optimization.OptimizerFactory(optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate(),
            gradient_transformers=gradient_transformers)
        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if runtime_config:
            optimizer = performance.configure_optimizer(
                optimizer,
                use_float16=runtime_config.mixed_precision_dtype == "float16",
                loss_scale=runtime_config.loss_scale)

        return optimizer
Пример #9
0
def create_optimizer(trainer_config: TrainerConfig,
                     runtime_config: Optional[RuntimeConfig] = None):
    """Creates an TF optimizer from configurations.

  Args:
    trainer_config: the parameters of the trainer.
    runtime_config: the parameters of the runtime.

  Returns:
    A tf.optimizers.Optimizer object.
  """
    opt_factory = optimization.OptimizerFactory(
        trainer_config.optimizer_config)
    optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate())
    # Configuring optimizer when loss_scale is set in runtime config. This helps
    # avoiding overflow/underflow for float16 computations.
    if runtime_config and runtime_config.loss_scale:
        optimizer = performance.configure_optimizer(
            optimizer,
            use_float16=runtime_config.mixed_precision_dtype == "float16",
            loss_scale=runtime_config.loss_scale)
    return optimizer
Пример #10
0
    def test_task(self, config_name):
        config = exp_factory.get_exp_config(config_name)
        config.task.train_data.global_batch_size = 2

        task = image_classification.EdgeTPUTask(config.task)
        model = task.build_model()
        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        if isinstance(optimizer, optimization.ExponentialMovingAverage
                      ) and not optimizer.has_shadow_copy:
            optimizer.shadow_copy(model)

        logs = task.train_step(next(iterator),
                               model,
                               optimizer,
                               metrics=metrics)
        for metric in metrics:
            logs[metric.name] = metric.result()
        self.assertIn('loss', logs)
        self.assertIn('accuracy', logs)
        self.assertIn('top_5_accuracy', logs)
        logs = task.validation_step(next(iterator), model, metrics=metrics)
        for metric in metrics:
            logs[metric.name] = metric.result()
        self.assertIn('loss', logs)
        self.assertIn('accuracy', logs)
        self.assertIn('top_5_accuracy', logs)
Пример #11
0
    def get_optimizer(self, stage_id):
        """Build optimizer for each stage."""
        if stage_id + 1 < self.num_stages():
            distill_config = self._progressive_config.layer_wise_distill_config
        else:
            distill_config = self._progressive_config.pretrain_distill_config

        params = self._optimizer_config.replace(
            learning_rate={
                'polynomial': {
                    'decay_steps': distill_config.decay_steps,
                    'initial_learning_rate':
                    distill_config.initial_learning_rate,
                    'end_learning_rate': distill_config.end_learning_rate,
                }
            },
            warmup={'linear': {
                'warmup_steps': distill_config.warmup_steps,
            }})
        opt_factory = optimization.OptimizerFactory(params)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())

        return optimizer
    def _build_and_run_model(self, config):
        task = image_classification.ImageClassificationTask(config.task)
        model = task.build_model()
        metrics = task.build_metrics()
        strategy = tf.distribute.get_strategy()

        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.task.train_data)

        iterator = iter(dataset)
        opt_factory = optimization.OptimizerFactory(
            config.trainer.optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        if isinstance(optimizer, optimization.ExponentialMovingAverage
                      ) and not optimizer.has_shadow_copy:
            optimizer.shadow_copy(model)

        # Run training
        for _ in range(5):
            logs = task.train_step(next(iterator),
                                   model,
                                   optimizer,
                                   metrics=metrics)
        for metric in metrics:
            logs[metric.name] = metric.result()

        # Run validation
        validation_logs = task.validation_step(next(iterator),
                                               model,
                                               metrics=metrics)
        for metric in metrics:
            validation_logs[metric.name] = metric.result()

        return logs, validation_logs, model.weights
Пример #13
0
    def __init__(self,
                 config: ExperimentConfig,
                 task: base_task.Task,
                 train: bool = True,
                 evaluate: bool = True,
                 model=None,
                 optimizer=None,
                 checkpoint_exporter=None):
        """Initialize common trainer for TensorFlow models.

    Args:
      config: An `ExperimentConfig` instance specifying experiment config.
      task: A base_task.Task instance.
      train: bool, whether or not this trainer will be used for training.
        default to True.
      evaluate: bool, whether or not this trainer will be used for evaluation.
        default to True.
      model: tf.keras.Model instance. If provided, it will be used instead of
        building model using task.build_model(). Default to None.
      optimizer: tf.keras.optimizers.Optimizer instance. If provided, it will
        used instead of the optimizer from config. Default to None.
      checkpoint_exporter: an object that has the `maybe_export_checkpoint`
        interface.
    """
        # Gets the current distribution strategy. If not inside any strategy scope,
        # it gets a single-replica no-op strategy.
        self._strategy = tf.distribute.get_strategy()
        self._config = config
        self._task = task

        self._model = model or task.build_model()

        if optimizer is None:
            opt_factory = optimization.OptimizerFactory(
                config.trainer.optimizer_config)
            self._optimizer = opt_factory.build_optimizer(
                opt_factory.build_learning_rate())
        else:
            self._optimizer = optimizer

        self._checkpoint_exporter = checkpoint_exporter

        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if config.runtime.loss_scale:
            self._optimizer = performance.configure_optimizer(
                self._optimizer,
                use_float16=config.runtime.mixed_precision_dtype == 'float16',
                loss_scale=config.runtime.loss_scale)

        # global_step increases by 1 after each training iteration.
        # We should have global_step.numpy() == self.optimizer.iterations.numpy()
        # when there is only 1 optimizer.
        self._global_step = orbit.utils.create_global_step()
        if hasattr(self.model, 'checkpoint_items'):
            checkpoint_items = self.model.checkpoint_items
        else:
            checkpoint_items = {}
        self._checkpoint = tf.train.Checkpoint(global_step=self.global_step,
                                               model=self.model,
                                               optimizer=self.optimizer,
                                               **checkpoint_items)

        self._train_loss = tf.keras.metrics.Mean('training_loss',
                                                 dtype=tf.float32)
        self._validation_loss = tf.keras.metrics.Mean('validation_loss',
                                                      dtype=tf.float32)
        self._train_metrics = self.task.build_metrics(
            training=True) + self.model.metrics
        self._validation_metrics = self.task.build_metrics(
            training=False) + self.model.metrics

        if train:
            train_dataset = orbit.utils.make_distributed_dataset(
                self.strategy, self.task.build_inputs,
                self.config.task.train_data)
            orbit.StandardTrainer.__init__(
                self,
                train_dataset,
                options=orbit.StandardTrainerOptions(
                    use_tf_while_loop=config.trainer.train_tf_while_loop,
                    use_tf_function=config.trainer.train_tf_function,
                    use_tpu_summary_optimization=config.trainer.
                    allow_tpu_summary))

        if evaluate:
            eval_dataset = orbit.utils.make_distributed_dataset(
                self.strategy, self.task.build_inputs,
                self.config.task.validation_data)
            orbit.StandardEvaluator.__init__(
                self,
                eval_dataset,
                options=orbit.StandardEvaluatorOptions(
                    use_tf_function=config.trainer.eval_tf_function))