Пример #1
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = ConvBertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
            return_dict=True,
        )

        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
Пример #2
0
 def load_config(self):
     config = ConvBertConfig(
         label2id=self.params.tag2id,
         id2label=self.params.id2tag,
         max_position_embeddings=1580).from_pretrained(
             "sarnikowski/convbert-medium-small-da-cased",
             finetuning_task="ner")
     return config
def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path,
                                           convbert_config_file,
                                           pytorch_dump_path):
    conf = ConvBertConfig.from_json_file(convbert_config_file)
    model = ConvBertModel(conf)

    model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path)
    model.save_pretrained(pytorch_dump_path)
Пример #4
0
 def load_context(self, context):
     """ function to enable loading flow mlflow """
     self.tokenizer = ConvBertTokenizerFast.from_pretrained(
         context.artifacts["tokenizer_dir"],
         config=ConvBertConfig.from_pretrained(
             os.path.join(context.artifacts["tokenizer_dir"],
                          "tokenizer_config.json")),
     )
     self.model = ConvBertForSequenceClassification.from_pretrained(
         context.artifacts["model_dir"], return_dict=True)
     self.model.eval()  # Put model in evaluation mode.
 def get_config(self):
     return ConvBertConfig(
         vocab_size=self.vocab_size,
         hidden_size=self.hidden_size,
         num_hidden_layers=self.num_hidden_layers,
         num_attention_heads=self.num_attention_heads,
         intermediate_size=self.intermediate_size,
         hidden_act=self.hidden_act,
         hidden_dropout_prob=self.hidden_dropout_prob,
         attention_probs_dropout_prob=self.attention_probs_dropout_prob,
         max_position_embeddings=self.max_position_embeddings,
         type_vocab_size=self.type_vocab_size,
         is_decoder=False,
         initializer_range=self.initializer_range,
     )