def create_lm_model(self, vocab_size, sequence_length, hidden_size, num_predictions, output="predictions"): # First, create a transformer stack that we can use to get the LM's # vocabulary weight. xformer_stack = networks.BertEncoder( vocab_size=vocab_size, num_layers=1, sequence_length=sequence_length, hidden_size=hidden_size, num_attention_heads=4, ) word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) _ = xformer_stack([word_ids, mask, type_ids]) # Create a maskedLM from the transformer stack. test_layer = layers.MaskedLM( embedding_table=xformer_stack.get_embedding_table(), output=output) # Create a model from the masked LM layer. lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) masked_lm_positions = tf.keras.Input(shape=(num_predictions, ), dtype=tf.int32) output = test_layer(lm_input_tensor, masked_positions=masked_lm_positions) return tf.keras.Model([lm_input_tensor, masked_lm_positions], output)
def __init__( self, encoder_network: tf.keras.Model, mlm_activation=None, mlm_initializer='glorot_uniform', classification_heads: Optional[List[tf.keras.layers.Layer]] = None, customized_masked_lm: Optional[tf.keras.layers.Layer] = None, name: str = 'bert', **kwargs): super().__init__(self, name=name, **kwargs) self._config = { 'encoder_network': encoder_network, 'mlm_initializer': mlm_initializer, 'classification_heads': classification_heads, 'name': name, } self.encoder_network = encoder_network inputs = copy.copy(self.encoder_network.inputs) self.classification_heads = classification_heads or [] if len(set([cls.name for cls in self.classification_heads])) != len( self.classification_heads): raise ValueError('Classification heads should have unique names.') self.masked_lm = customized_masked_lm or layers.MaskedLM( embedding_table=self.encoder_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, name='cls/predictions') masked_lm_positions = tf.keras.layers.Input(shape=(None, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) self.inputs = inputs
def __init__( self, encoder_network: tf.keras.Model, mlm_activation=None, mlm_initializer='glorot_uniform', classification_heads: Optional[List[tf.keras.layers.Layer]] = None, name: str = 'bert', **kwargs): self._self_setattr_tracking = False self._config = { 'encoder_network': encoder_network, 'mlm_initializer': mlm_initializer, 'classification_heads': classification_heads, 'name': name, } self.encoder_network = encoder_network inputs = copy.copy(self.encoder_network.inputs) outputs = dict() encoder_network_outputs = self.encoder_network(inputs) if isinstance(encoder_network_outputs, list): outputs['pooled_output'] = encoder_network_outputs[1] # When `encoder_network` was instantiated with return_all_encoder_outputs # set to True, `encoder_network_outputs[0]` is a list containing # all transformer layers' output. if isinstance(encoder_network_outputs[0], list): outputs['encoder_outputs'] = encoder_network_outputs[0] outputs['sequence_output'] = encoder_network_outputs[0][-1] else: outputs['sequence_output'] = encoder_network_outputs[0] elif isinstance(encoder_network_outputs, dict): outputs = encoder_network_outputs else: raise ValueError( 'encoder_network\'s output should be either a list ' 'or a dict, but got %s' % encoder_network_outputs) sequence_output = outputs['sequence_output'] self.classification_heads = classification_heads or [] if len(set([cls.name for cls in self.classification_heads])) != len( self.classification_heads): raise ValueError('Classification heads should have unique names.') self.masked_lm = layers.MaskedLM( embedding_table=self.encoder_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, name='cls/predictions') masked_lm_positions = tf.keras.layers.Input(shape=(None, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) outputs['mlm_logits'] = self.masked_lm( sequence_output, masked_positions=masked_lm_positions) for cls_head in self.classification_heads: outputs[cls_head.name] = cls_head(sequence_output) super(BertPretrainerV2, self).__init__(inputs=inputs, outputs=outputs, name=name, **kwargs)
def test_bert_pretrainerv2(self, dict_outputs, return_all_encoder_outputs, use_customized_masked_lm): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 hidden_size = 48 num_layers = 2 test_network = networks.BertEncoder( vocab_size=vocab_size, num_layers=num_layers, hidden_size=hidden_size, max_sequence_length=sequence_length, return_all_encoder_outputs=return_all_encoder_outputs, dict_outputs=dict_outputs) # Create a BERT trainer with the created network. if use_customized_masked_lm: customized_masked_lm = layers.MaskedLM( embedding_table=test_network.get_embedding_table()) else: customized_masked_lm = None bert_trainer_model = bert_pretrainer.BertPretrainerV2( encoder_network=test_network, customized_masked_lm=customized_masked_lm) num_token_predictions = 20 # Create a set of 2-dimensional inputs (the first dimension is implicit). inputs = dict( input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), masked_lm_positions=tf.keras.Input( shape=(num_token_predictions,), dtype=tf.int32)) # Invoke the trainer model on the inputs. This causes the layer to be built. outputs = bert_trainer_model(inputs) has_encoder_outputs = dict_outputs or return_all_encoder_outputs if has_encoder_outputs: self.assertSameElements( outputs.keys(), ['sequence_output', 'pooled_output', 'mlm_logits', 'encoder_outputs']) self.assertLen(outputs['encoder_outputs'], num_layers) else: self.assertSameElements( outputs.keys(), ['sequence_output', 'pooled_output', 'mlm_logits']) # Validate that the outputs are of the expected shape. expected_lm_shape = [None, num_token_predictions, vocab_size] self.assertAllEqual(expected_lm_shape, outputs['mlm_logits'].shape.as_list()) expected_sequence_output_shape = [None, sequence_length, hidden_size] self.assertAllEqual(expected_sequence_output_shape, outputs['sequence_output'].shape.as_list()) expected_pooled_output_shape = [None, hidden_size] self.assertAllEqual(expected_pooled_output_shape, outputs['pooled_output'].shape.as_list())
def __init__(self, generator_network, discriminator_network, vocab_size, num_classes, sequence_length, num_token_predictions, mlm_activation=None, mlm_initializer='glorot_uniform', output_type='logits', disallow_correct=False, **kwargs): super(ElectraPretrainer, self).__init__() self._config = { 'generator_network': generator_network, 'discriminator_network': discriminator_network, 'vocab_size': vocab_size, 'num_classes': num_classes, 'sequence_length': sequence_length, 'num_token_predictions': num_token_predictions, 'mlm_activation': mlm_activation, 'mlm_initializer': mlm_initializer, 'output_type': output_type, 'disallow_correct': disallow_correct, } for k, v in kwargs.items(): self._config[k] = v self.generator_network = generator_network self.discriminator_network = discriminator_network self.vocab_size = vocab_size self.num_classes = num_classes self.sequence_length = sequence_length self.num_token_predictions = num_token_predictions self.mlm_activation = mlm_activation self.mlm_initializer = mlm_initializer self.output_type = output_type self.disallow_correct = disallow_correct self.masked_lm = layers.MaskedLM( embedding_table=generator_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, output=output_type, name='generator_masked_lm') self.classification = layers.ClassificationHead( inner_dim=generator_network._config_dict['hidden_size'], num_classes=num_classes, initializer=mlm_initializer, name='generator_classification_head') self.discriminator_projection = tf.keras.layers.Dense( units=discriminator_network._config_dict['hidden_size'], activation=mlm_activation, kernel_initializer=mlm_initializer, name='discriminator_projection_head') self.discriminator_head = tf.keras.layers.Dense( units=1, kernel_initializer=mlm_initializer)
def __init__(self, generator_network, discriminator_mws_network, num_discriminator_task_agnostic_layers, vocab_size, candidate_size=5, mlm_activation=None, mlm_initializer='glorot_uniform', output_type='logits', **kwargs): super().__init__() self._config = { 'generator_network': generator_network, 'discriminator_mws_network': discriminator_mws_network, 'num_discriminator_task_agnostic_layers': num_discriminator_task_agnostic_layers, 'vocab_size': vocab_size, 'candidate_size': candidate_size, 'mlm_activation': mlm_activation, 'mlm_initializer': mlm_initializer, 'output_type': output_type, } for k, v in kwargs.items(): self._config[k] = v self.generator_network = generator_network self.discriminator_mws_network = discriminator_mws_network self.vocab_size = vocab_size self.candidate_size = candidate_size self.mlm_activation = mlm_activation self.mlm_initializer = mlm_initializer self.output_type = output_type embedding_table = generator_network.embedding_network.get_embedding_table( ) self.masked_lm = layers.MaskedLM(embedding_table=embedding_table, activation=mlm_activation, initializer=mlm_initializer, output=output_type, name='generator_masked_lm') discriminator_cfg = self.discriminator_mws_network.get_config() self.discriminator_rtd_head = ReplacedTokenDetectionHead( encoder_cfg=discriminator_cfg, num_task_agnostic_layers=num_discriminator_task_agnostic_layers, output=output_type, name='discriminator_rtd') hidden_cfg = discriminator_cfg['hidden_cfg'] self.discriminator_mws_head = MultiWordSelectionHead( embedding_table=embedding_table, activation=hidden_cfg['intermediate_activation'], initializer=hidden_cfg['kernel_initializer'], output=output_type, name='discriminator_mws') self.num_task_agnostic_layers = num_discriminator_task_agnostic_layers
def __init__( self, num_masked_tokens: int, encoder_network: tf.keras.Model, mlm_activation=None, mlm_initializer='glorot_uniform', classification_heads: Optional[List[tf.keras.layers.Layer]] = None, name: str = 'bert', **kwargs): self._self_setattr_tracking = False self._config = { 'encoder_network': encoder_network, 'num_masked_tokens': num_masked_tokens, 'mlm_initializer': mlm_initializer, 'classification_heads': classification_heads, 'name': name, } self.encoder_network = encoder_network inputs = copy.copy(self.encoder_network.inputs) sequence_output, _ = self.encoder_network(inputs) self.classification_heads = classification_heads or [] if len(set([cls.name for cls in self.classification_heads])) != len( self.classification_heads): raise ValueError('Classification heads should have unique names.') outputs = dict() if num_masked_tokens > 0: self.masked_lm = layers.MaskedLM( embedding_table=self.encoder_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, name='cls/predictions') masked_lm_positions = tf.keras.layers.Input( shape=(num_masked_tokens, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) outputs['lm_output'] = self.masked_lm( sequence_output, masked_positions=masked_lm_positions) for cls_head in self.classification_heads: outputs[cls_head.name] = cls_head(sequence_output) super(BertPretrainerV2, self).__init__(inputs=inputs, outputs=outputs, name=name, **kwargs)
def __init__(self, network, num_classes, num_token_predictions, embedding_table=None, activation=None, initializer='glorot_uniform', output='logits', **kwargs): # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a copy of the network inputs for use # when we construct the Model object at the end of init. (We keep a copy # because we'll be adding another tensor to the copy later.) network_inputs = network.inputs inputs = copy.copy(network_inputs) # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. # Note that, because of how deferred construction happens, we can't use # the copy of the list here - by the time the network is invoked, the list # object contains the additional input added below. sequence_output, cls_output = network(network_inputs) # The encoder network may get outputs from all layers. if isinstance(sequence_output, list): sequence_output = sequence_output[-1] if isinstance(cls_output, list): cls_output = cls_output[-1] sequence_output_length = sequence_output.shape.as_list()[1] if sequence_output_length is not None and (sequence_output_length < num_token_predictions): raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % (sequence_output_length, num_token_predictions)) masked_lm_positions = tf.keras.layers.Input( shape=(num_token_predictions, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) if embedding_table is None: embedding_table = network.get_embedding_table() masked_lm = layers.MaskedLM(embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') lm_outputs = masked_lm(sequence_output, masked_positions=masked_lm_positions) classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = classification(cls_output) super(BertPretrainer, self).__init__(inputs=inputs, outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs), **kwargs) # b/164516224 # Once we've created the network using the Functional API, we call # super().__init__ as though we were invoking the Functional API Model # constructor, resulting in this object having all the properties of a model # created using the Functional API. Once super().__init__ is called, we # can assign attributes to `self` - note that all `self` assignments are # below this line. config_dict = { 'network': network, 'num_classes': num_classes, 'num_token_predictions': num_token_predictions, 'activation': activation, 'initializer': initializer, 'output': output, } # We are storing the config dict as a namedtuple here to ensure checkpoint # compatibility with an earlier version of this model which did not track # the config dict attribute. TF does not track immutable attrs which # do not contain Trackables, so by creating a config namedtuple instead of # a dict we avoid tracking it. config_cls = collections.namedtuple('Config', config_dict.keys()) self._config = config_cls(**config_dict) self.encoder = network self.classification = classification self.masked_lm = masked_lm
def __init__(self, network, num_classes, num_token_predictions, embedding_table=None, activation=None, initializer='glorot_uniform', output='logits', **kwargs): self._self_setattr_tracking = False self._config = { 'network': network, 'num_classes': num_classes, 'num_token_predictions': num_token_predictions, 'activation': activation, 'initializer': initializer, 'output': output, } self.encoder = network # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a copy of the network inputs for use # when we construct the Model object at the end of init. (We keep a copy # because we'll be adding another tensor to the copy later.) network_inputs = self.encoder.inputs inputs = copy.copy(network_inputs) # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. # Note that, because of how deferred construction happens, we can't use # the copy of the list here - by the time the network is invoked, the list # object contains the additional input added below. sequence_output, cls_output = self.encoder(network_inputs) # The encoder network may get outputs from all layers. if isinstance(sequence_output, list): sequence_output = sequence_output[-1] if isinstance(cls_output, list): cls_output = cls_output[-1] sequence_output_length = sequence_output.shape.as_list()[1] if sequence_output_length is not None and (sequence_output_length < num_token_predictions): raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % (sequence_output_length, num_token_predictions)) masked_lm_positions = tf.keras.layers.Input( shape=(num_token_predictions, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) if embedding_table is None: embedding_table = self.encoder.get_embedding_table() self.masked_lm = layers.MaskedLM(embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') lm_outputs = self.masked_lm(sequence_output, masked_positions=masked_lm_positions) self.classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = self.classification(cls_output) super(BertPretrainer, self).__init__(inputs=inputs, outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs), **kwargs)
def __init__( self, encoder_network: tf.keras.Model, mlm_activation=None, mlm_initializer='glorot_uniform', classification_heads: Optional[List[tf.keras.layers.Layer]] = None, customized_masked_lm: Optional[tf.keras.layers.Layer] = None, name: str = 'bert', **kwargs): inputs = copy.copy(encoder_network.inputs) outputs = {} encoder_network_outputs = encoder_network(inputs) if isinstance(encoder_network_outputs, list): outputs['pooled_output'] = encoder_network_outputs[1] if isinstance(encoder_network_outputs[0], list): outputs['encoder_outputs'] = encoder_network_outputs[0] outputs['sequence_output'] = encoder_network_outputs[0][-1] else: outputs['sequence_output'] = encoder_network_outputs[0] elif isinstance(encoder_network_outputs, dict): outputs = encoder_network_outputs else: raise ValueError('encoder_network\'s output should be either a list ' 'or a dict, but got %s' % encoder_network_outputs) masked_lm_positions = tf.keras.layers.Input( shape=(None,), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) masked_lm_layer = customized_masked_lm or layers.MaskedLM( embedding_table=encoder_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, name='cls/predictions') sequence_output = outputs['sequence_output'] outputs['mlm_logits'] = masked_lm_layer( sequence_output, masked_positions=masked_lm_positions) classification_head_layers = classification_heads or [] for cls_head in classification_head_layers: cls_outputs = cls_head(sequence_output) if isinstance(cls_outputs, dict): outputs.update(cls_outputs) else: outputs[cls_head.name] = cls_outputs super(MobileBERTEdgeTPUPretrainer, self).__init__( inputs=inputs, outputs=outputs, name=name, **kwargs) self._config = { 'encoder_network': encoder_network, 'mlm_activation': mlm_activation, 'mlm_initializer': mlm_initializer, 'classification_heads': classification_heads, 'customized_masked_lm': customized_masked_lm, 'name': name, } self.encoder_network = encoder_network self.masked_lm = masked_lm_layer self.classification_heads = classification_head_layers
self.generator_network = generator_network self.discriminator_network = discriminator_network self.vocab_size = vocab_size self.num_classes = num_classes <<<<<<< HEAD self.sequence_length = sequence_length ======= >>>>>>> a811a3b7e640722318ad868c99feddf3f3063e36 self.num_token_predictions = num_token_predictions self.mlm_activation = mlm_activation self.mlm_initializer = mlm_initializer self.output_type = output_type self.disallow_correct = disallow_correct self.masked_lm = layers.MaskedLM( embedding_table=generator_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, output=output_type, name='generator_masked_lm') self.classification = layers.ClassificationHead( inner_dim=generator_network._config_dict['hidden_size'], num_classes=num_classes, initializer=mlm_initializer, name='generator_classification_head') self.discriminator_projection = tf.keras.layers.Dense( units=discriminator_network._config_dict['hidden_size'], activation=mlm_activation, kernel_initializer=mlm_initializer, name='discriminator_projection_head') self.discriminator_head = tf.keras.layers.Dense( units=1, kernel_initializer=mlm_initializer)
raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % (sequence_output_length, num_token_predictions)) masked_lm_positions = tf.keras.layers.Input( shape=(num_token_predictions,), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) if embedding_table is None: embedding_table = self.encoder.get_embedding_table() self.masked_lm = layers.MaskedLM( embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') lm_outputs = self.masked_lm( sequence_output, masked_positions=masked_lm_positions) self.classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = self.classification(cls_output) super(BertPretrainer, self).__init__( inputs=inputs,