Exemplo n.º 1
0
 def test_config_override(self):
     # Define several dummy flags which are call by the utils.config_override
     # function.
     flags.DEFINE_string('tpu', None, 'tpu_address.')
     flags.DEFINE_list('config_file', [], 'A list of config files path.')
     flags.DEFINE_string('params_override', 'orbit_config.mode=eval',
                         'Override params.')
     flags.DEFINE_string('model_dir', '/tmp/', 'Model saving directory.')
     flags.DEFINE_list('mode', ['train'], 'Job mode.')
     flags.DEFINE_bool(
         'use_vizier', False,
         'Whether to enable vizier based hyperparameter search.')
     experiment_params = params.EdgeTPUBERTCustomParams()
     # By default, the orbit is set with train mode.
     self.assertEqual(experiment_params.orbit_config.mode, 'train')
     # Config override should set the orbit to eval mode.
     experiment_params = utils.config_override(experiment_params, FLAGS)
     self.assertEqual(experiment_params.orbit_config.mode, 'eval')
Exemplo n.º 2
0
  def test_config_override(self):
    # Define several dummy flags which are call by the utils.config_override
    # function.
    file_path = 'third_party/tensorflow_models/official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_m.yaml'
    flags.DEFINE_string('tpu', None, 'tpu_address.')
    flags.DEFINE_list('config_file', [file_path],
                      'A list of config files path.')
    flags.DEFINE_string('params_override', None, 'Override params.')
    flags.DEFINE_string('model_dir', '/tmp/', 'Model saving directory.')
    flags.DEFINE_list('mode', ['train'], 'Job mode.')
    flags.DEFINE_bool('use_vizier', False,
                      'Whether to enable vizier based hyperparameter search.')
    experiment_params = params.EdgeTPUBERTCustomParams()
    experiment_params = utils.config_override(experiment_params, FLAGS)
    experiment_params_dict = experiment_params.as_dict()

    with tf.io.gfile.GFile(file_path, 'r') as f:
      loaded_dict = yaml.load(f, Loader=yaml.FullLoader)

    # experiment_params contains all the configs but the loaded_dict might
    # only contains partial of the configs.
    self.assertTrue(nested_dict_compare(loaded_dict, experiment_params_dict))
Exemplo n.º 3
0
def main(argv: Sequence[str]) -> None:
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Set up experiment params and load the configs from file/files.
    experiment_params = params.EdgeTPUBERTCustomParams()
    experiment_params = utils.config_override(experiment_params, FLAGS)

    # change the input mask type to tf.float32 to avoid additional casting op.
    experiment_params.student_model.encoder.mobilebert.input_mask_dtype = 'float32'

    # Experiments indicate using -120 as the mask value for Softmax is good enough
    # for both int8 and bfloat. So we set quantization_friendly to True for both
    # quant and float model.
    pretrainer_model = model_builder.build_bert_pretrainer(
        experiment_params.student_model,
        name='pretrainer',
        quantization_friendly=True)

    encoder_network = pretrainer_model.encoder_network
    model = models.BertSpanLabeler(
        network=encoder_network,
        initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))

    # Load model weights.
    if FLAGS.model_checkpoint is not None:
        checkpoint_dict = {'model': model}
        checkpoint = tf.train.Checkpoint(**checkpoint_dict)
        checkpoint.restore(
            FLAGS.model_checkpoint).assert_existing_objects_matched()

    model_for_serving = build_model_for_serving(model)
    model_for_serving.summary()

    # TODO(b/194449109): Need to save the model to file and then convert tflite
    # with 'tf.lite.TFLiteConverter.from_saved_model()' to get the expected
    # accuracy
    tmp_dir = tempfile.TemporaryDirectory().name
    model_for_serving.save(tmp_dir)

    def _representative_dataset():
        dataset_params = question_answering_dataloader.QADataConfig()
        dataset_params.input_path = SQUAD_TRAIN_SPLIT
        dataset_params.drop_remainder = False
        dataset_params.global_batch_size = 1
        dataset_params.is_training = True

        dataset = orbit.utils.make_distributed_dataset(
            tf.distribute.get_strategy(), build_inputs, dataset_params)
        for example in dataset.take(100):
            inputs = example[0]
            input_word_ids = inputs['input_word_ids']
            input_mask = inputs['input_mask']
            input_type_ids = inputs['input_type_ids']
            yield [input_word_ids, input_mask, input_type_ids]

    converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir)
    if FLAGS.quantization_method in ['full-integer', 'hybrid']:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
    if FLAGS.quantization_method in ['full-integer']:
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.float32
        converter.representative_dataset = _representative_dataset

    tflite_quant_model = converter.convert()
    export_model_path = os.path.join(FLAGS.export_path, 'model.tflite')
    with tf.io.gfile.GFile(export_model_path, 'wb') as f:
        f.write(tflite_quant_model)
    logging.info('Successfully save the tflite to %s', FLAGS.export_path)
Exemplo n.º 4
0
def main(_):

    # Set up experiment params and load the configs from file/files.
    experiment_params = params.EdgeTPUBERTCustomParams()
    experiment_params = utils.config_override(experiment_params, FLAGS)
    model_dir = utils.get_model_dir(experiment_params, FLAGS)

    distribution_strategy = distribute_utils.get_distribution_strategy(
        distribution_strategy=experiment_params.runtime.distribution_strategy,
        all_reduce_alg=experiment_params.runtime.all_reduce_alg,
        num_gpus=experiment_params.runtime.num_gpus,
        tpu_address=experiment_params.runtime.tpu_address)

    with distribution_strategy.scope():
        teacher_model = model_builder.build_bert_pretrainer(
            pretrainer_cfg=experiment_params.teacher_model,
            quantization_friendly=False,
            name='teacher')
        student_model = model_builder.build_bert_pretrainer(
            pretrainer_cfg=experiment_params.student_model,
            quantization_friendly=True,
            name='student')

        # Load model weights.
        teacher_ckpt_dir_or_file = experiment_params.teacher_model_init_checkpoint
        if not teacher_ckpt_dir_or_file:
            raise ValueError(
                '`teacher_model_init_checkpoint` is not specified.')
        utils.load_checkpoint(teacher_model, teacher_ckpt_dir_or_file)

        student_ckpt_dir_or_file = experiment_params.student_model_init_checkpoint
        if not student_ckpt_dir_or_file:
            # Makes sure the pretrainer variables are created.
            _ = student_model(student_model.inputs)
            logging.warn(
                'No student checkpoint is provided, training might take '
                'much longer before converging.')
        else:
            utils.load_checkpoint(student_model, student_ckpt_dir_or_file)

        runner = mobilebert_edgetpu_trainer.MobileBERTEdgeTPUDistillationTrainer(
            teacher_model=teacher_model,
            student_model=student_model,
            strategy=distribution_strategy,
            experiment_params=experiment_params,
            export_ckpt_path=model_dir)

        # Save checkpoint for preemption handling.
        # Checkpoint for downstreaming tasks are saved separately inside the
        # runner's train_loop_end() function.
        checkpoint = tf.train.Checkpoint(
            teacher_model=runner.teacher_model,
            student_model=runner.student_model,
            layer_wise_optimizer=runner.layer_wise_optimizer,
            e2e_optimizer=runner.e2e_optimizer,
            current_step=runner.current_step)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            directory=model_dir,
            max_to_keep=5,
            step_counter=runner.current_step,
            checkpoint_interval=20000,
            init_fn=None)

    controller = orbit.Controller(
        trainer=runner,
        evaluator=runner,
        global_step=runner.current_step,
        strategy=distribution_strategy,
        steps_per_loop=experiment_params.orbit_config.steps_per_loop,
        summary_dir=os.path.join(model_dir, 'train'),
        eval_summary_dir=os.path.join(model_dir, 'eval'),
        checkpoint_manager=checkpoint_manager)

    if FLAGS.mode == 'train':
        controller.train(steps=experiment_params.orbit_config.total_steps)
    else:
        raise ValueError('Unsupported mode, only support `train`')