def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = MobileBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, embedding_size=self.embedding_size, ) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, mobilebert_config_file, pytorch_dump_path): # Initialise PyTorch model config = MobileBertConfig.from_json_file(mobilebert_config_file) print(f"Building PyTorch model from configuration: {config}") model = MobileBertForPreTraining(config) # Load weights from tf checkpoint model = load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path) # Save pytorch-model print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path)
def get_config(self): return MobileBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, embedding_size=self.embedding_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, is_decoder=False, initializer_range=self.initializer_range, )
# do something with encoder_outputs # do something # do something # do something # endregion sequence_output = encoder_outputs[0] pooled_output = self.pooler(sequence_output) if self.pooler is not None else None if not return_dict: return (sequence_output, pooled_output) + encoder_outputs[1:] return BaseModelOutputWithPooling( last_hidden_state=sequence_output, pooler_output=pooled_output, hidden_states=encoder_outputs.hidden_states, attentions=encoder_outputs.attentions, ) def cos_sim(x, y): return torch.sum(x * y, dim=-1) / (torch.norm(x, dim=-1) * torch.norm(y, dim=-1)) if __name__ == '__main__': x = (torch.rand(4, 256) * 100).long() tc_model = AutoModel.from_pretrained('bert-base-cased') st_model = StudentModel(MobileBertConfig()) ib_model = IB(768, 128, 256) with torch.no_grad(): tc_result = tc_model(x, return_dict=False)[1] # (bs, 768) target = ib_model(tc_result) # (bs, 128) st_result = st_model(x)[1] # (bs, 128)
def init_encoder(cls, cfg_name: str) -> MobileBertModel: cfg = MobileBertConfig.from_pretrained(cfg_name) return cls.from_pretrained(cfg_name, config=cfg)