def test_task(self): config = masked_lm.MaskedLMConfig( init_checkpoint=self.get_temp_dir(), scale_loss=True, model=bert.PretrainerConfig( encoder=encoders.EncoderConfig( bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)), cls_heads=[ bert.ClsHeadConfig( inner_dim=10, num_classes=2, name="next_sentence") ]), train_data=pretrain_dataloader.BertPretrainDataConfig( input_path="dummy", max_predictions_per_seq=20, seq_length=128, global_batch_size=1)) task = masked_lm.MaskedLMTask(config) model = task.build_model() metrics = task.build_metrics() dataset = task.build_inputs(config.train_data) iterator = iter(dataset) optimizer = tf.keras.optimizers.SGD(lr=0.1) task.train_step(next(iterator), model, optimizer, metrics=metrics) task.validation_step(next(iterator), model, metrics=metrics) # Saves a checkpoint. ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) ckpt.save(config.init_checkpoint) task.initialize(model)
def bert_pretraining() -> cfg.ExperimentConfig: """BERT pretraining experiment.""" config = cfg.ExperimentConfig( task=masked_lm.MaskedLMConfig( train_data=pretrain_dataloader.BertPretrainDataConfig(), validation_data=pretrain_dataloader.BertPretrainDataConfig( is_training=False)), trainer=cfg.TrainerConfig( train_steps=1000000, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.01, 'exclude_from_weight_decay': ['LayerNorm', 'layer_norm', 'bias'], } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 1e-4, 'end_learning_rate': 0.0, } }, 'warmup': { 'type': 'polynomial' } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def test_task_determinism(self): config = masked_lm.MaskedLMConfig( init_checkpoint=self.get_temp_dir(), scale_loss=True, model=bert.PretrainerConfig( encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig( vocab_size=30522, num_layers=1)), cls_heads=[ bert.ClsHeadConfig(inner_dim=10, num_classes=2, name="next_sentence") ]), train_data=pretrain_dataloader.BertPretrainDataConfig( max_predictions_per_seq=20, seq_length=128, global_batch_size=1)) tf.keras.utils.set_random_seed(1) logs1, validation_logs1, weights1 = self._build_and_run_model(config) tf.keras.utils.set_random_seed(1) logs2, validation_logs2, weights2 = self._build_and_run_model(config) self.assertEqual(logs1["loss"], logs2["loss"]) self.assertEqual(validation_logs1["loss"], validation_logs2["loss"]) for weight1, weight2 in zip(weights1, weights2): self.assertAllEqual(weight1, weight2)
def roformer_pretraining() -> cfg.ExperimentConfig: """BERT pretraining experiment.""" config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(enable_xla=True), task=masked_lm.MaskedLMConfig( model=bert.PretrainerConfig( encoder=encoders.EncoderConfig( type='any', any=roformer.RoformerEncoderConfig()), cls_heads=[ bert.ClsHeadConfig( inner_dim=768, num_classes=2, dropout_rate=0.1, name='next_sentence') ]), train_data=pretrain_dataloader.BertPretrainDataConfig( use_v2_feature_names=True), validation_data=pretrain_dataloader.BertPretrainDataConfig( use_v2_feature_names=True, is_training=False)), trainer=cfg.TrainerConfig( optimizer_config=RoformerOptimizationConfig(), train_steps=1000000), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def bert_pretraining() -> cfg.ExperimentConfig: """BERT pretraining experiment.""" config = cfg.ExperimentConfig( task=masked_lm.MaskedLMConfig( train_data=pretrain_dataloader.BertPretrainDataConfig(), validation_data=pretrain_dataloader.BertPretrainDataConfig( is_training=False)), trainer=_TRAINER, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def test_masked_lm(self, use_v2_feature_names): if use_v2_feature_names: input_word_ids_field = "input_word_ids" input_type_ids_field = "input_type_ids" else: input_word_ids_field = "input_ids" input_type_ids_field = "segment_ids" config = masked_lm.MaskedLMConfig(model=bert.PretrainerConfig( encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig( vocab_size=30522, num_layers=1)), cls_heads=[ bert.ClsHeadConfig( inner_dim=10, num_classes=2, name="next_sentence") ])) task = masked_lm.MaskedLMTask(config) model = task.build_model() params = serving_modules.MaskedLM.Params( parse_sequence_length=10, max_predictions_per_seq=5, use_v2_feature_names=use_v2_feature_names) export_module = serving_modules.MaskedLM(params=params, model=model) functions = export_module.get_inference_signatures({ "serve": "serving_default", "serve_examples": "serving_examples" }) self.assertSameElements(functions.keys(), ["serving_default", "serving_examples"]) dummy_ids = tf.ones((10, 10), dtype=tf.int32) dummy_pos = tf.ones((10, 5), dtype=tf.int32) outputs = functions["serving_default"](input_word_ids=dummy_ids, input_mask=dummy_ids, input_type_ids=dummy_ids, masked_lm_positions=dummy_pos) self.assertEqual(outputs["classification"].shape, (10, 2)) dummy_ids = tf.ones((10, ), dtype=tf.int32) dummy_pos = tf.ones((5, ), dtype=tf.int32) examples = _create_fake_serialized_examples({ input_word_ids_field: dummy_ids, "input_mask": dummy_ids, input_type_ids_field: dummy_ids, "masked_lm_positions": dummy_pos }) outputs = functions["serving_examples"](examples) self.assertEqual(outputs["classification"].shape, (10, 2))
def bert_dynamic() -> cfg.ExperimentConfig: """BERT base with dynamic input sequences. TPU needs to run with tf.data service with round-robin behavior. """ config = cfg.ExperimentConfig( task=masked_lm.MaskedLMConfig( train_data=pretrain_dynamic_dataloader.BertPretrainDataConfig(), validation_data=pretrain_dataloader.BertPretrainDataConfig( is_training=False)), trainer=_TRAINER, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def test_masked_lm(self): config = masked_lm.MaskedLMConfig( model=bert.PretrainerConfig( encoder=encoders.EncoderConfig( bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)), cls_heads=[ bert.ClsHeadConfig(inner_dim=10, num_classes=2, name="foo") ])) task = masked_lm.MaskedLMTask(config) model = task.build_model() ckpt = tf.train.Checkpoint(model=model) ckpt_path = ckpt.save(self.get_temp_dir()) export_module_cls = export_savedmodel.lookup_export_module(task) serving_params = { "cls_head_name": "foo", "parse_sequence_length": 10, "max_predictions_per_seq": 5 } params = export_module_cls.Params(**serving_params) export_module = export_module_cls(params=params, model=model) export_dir = export_savedmodel_util.export( export_module, function_keys={ "serve": "serving_default", "serve_examples": "serving_examples" }, checkpoint_path=ckpt_path, export_savedmodel_dir=self.get_temp_dir()) imported = tf.saved_model.load(export_dir) self.assertSameElements(imported.signatures.keys(), ["serving_default", "serving_examples"]) serving_fn = imported.signatures["serving_default"] dummy_ids = tf.ones((1, 10), dtype=tf.int32) dummy_pos = tf.ones((1, 5), dtype=tf.int32) outputs = serving_fn( input_word_ids=dummy_ids, input_mask=dummy_ids, input_type_ids=dummy_ids, masked_lm_positions=dummy_pos) self.assertEqual(outputs["classification"].shape, (1, 2))
def test_task(self): config = masked_lm.MaskedLMConfig( model=bert.BertPretrainerConfig( encoders.TransformerEncoderConfig(vocab_size=30522, num_layers=1), num_masked_tokens=20, cls_heads=[ bert.ClsHeadConfig(inner_dim=10, num_classes=2, name="next_sentence") ]), train_data=bert.BertPretrainDataConfig(input_path="dummy", max_predictions_per_seq=20, seq_length=128, global_batch_size=1)) task = masked_lm.MaskedLMTask(config) model = task.build_model() metrics = task.build_metrics() dataset = task.build_inputs(config.train_data) iterator = iter(dataset) optimizer = tf.keras.optimizers.SGD(lr=0.1) task.train_step(next(iterator), model, optimizer, metrics=metrics) task.validation_step(next(iterator), model, metrics=metrics)
def test_distribution_strategy(self, distribution_strategy): max_seq_length = 128 batch_size = 8 input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') _create_fake_dataset(input_path, seq_length=60, num_masked_tokens=20, max_seq_length=max_seq_length, num_examples=batch_size) data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( is_training=False, input_path=input_path, seq_bucket_lengths=[64, 128], global_batch_size=batch_size) dataloader = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( data_config) distributed_ds = orbit.utils.make_distributed_dataset( distribution_strategy, dataloader.load) train_iter = iter(distributed_ds) with distribution_strategy.scope(): config = masked_lm.MaskedLMConfig( init_checkpoint=self.get_temp_dir(), model=bert.PretrainerConfig( encoders.EncoderConfig(bert=encoders.BertEncoderConfig( vocab_size=30522, num_layers=1)), cls_heads=[ bert.ClsHeadConfig(inner_dim=10, num_classes=2, name='next_sentence') ]), train_data=data_config) task = masked_lm.MaskedLMTask(config) model = task.build_model() metrics = task.build_metrics() @tf.function def step_fn(features): return task.validation_step(features, model, metrics=metrics) distributed_outputs = distribution_strategy.run( step_fn, args=(next(train_iter), )) local_results = tf.nest.map_structure( distribution_strategy.experimental_local_results, distributed_outputs) logging.info('Dynamic padding: local_results= %s', str(local_results)) dynamic_metrics = {} for metric in metrics: dynamic_metrics[metric.name] = metric.result() data_config = pretrain_dataloader.BertPretrainDataConfig( is_training=False, input_path=input_path, seq_length=max_seq_length, max_predictions_per_seq=20, global_batch_size=batch_size) dataloader = pretrain_dataloader.BertPretrainDataLoader(data_config) distributed_ds = orbit.utils.make_distributed_dataset( distribution_strategy, dataloader.load) train_iter = iter(distributed_ds) with distribution_strategy.scope(): metrics = task.build_metrics() @tf.function def step_fn_b(features): return task.validation_step(features, model, metrics=metrics) distributed_outputs = distribution_strategy.run( step_fn_b, args=(next(train_iter), )) local_results = tf.nest.map_structure( distribution_strategy.experimental_local_results, distributed_outputs) logging.info('Static padding: local_results= %s', str(local_results)) static_metrics = {} for metric in metrics: static_metrics[metric.name] = metric.result() for key in static_metrics: # We need to investigate the differences on losses. if key != 'next_sentence_loss': self.assertEqual(dynamic_metrics[key], static_metrics[key])