Python RoFormerForMaskedLM примеры использования

Язык программирования: Python

Пространство имен/Пакет: transformers

Класс/Тип: RoFormerForMaskedLM

Примеров на hotexamples.com: 4

Python RoFormerForMaskedLM - 4 примера найдено. Это лучшие примеры Python кода для transformers.RoFormerForMaskedLM, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RoFormerForMaskedLM(2)

from_pretrained(2)

eval(1)

state_dict(1)

to(1)

Пример #1

Показать файл

Файл: convert_roformer_original_tf_checkpoint_to_pytorch.py Проект: yulinggu-cs/transformers

def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    # Initialise PyTorch model
    config = RoFormerConfig.from_json_file(bert_config_file)
    print(f"Building PyTorch model from configuration: {config}")
    model = RoFormerForMaskedLM(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_roformer(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print(f"Save PyTorch model to {pytorch_dump_path}")
    torch.save(model.state_dict(),
               pytorch_dump_path,
               _use_new_zipfile_serialization=False)

Пример #2

Показать файл

Файл: test_modeling_roformer.py Проект: huggingface/transformers

 def create_and_check_for_masked_lm(self, config, input_ids, token_type_ids,
                                    input_mask, sequence_labels,
                                    token_labels, choice_labels):
     model = RoFormerForMaskedLM(config=config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids,
                    attention_mask=input_mask,
                    token_type_ids=token_type_ids,
                    labels=token_labels)
     self.parent.assertEqual(
         result.logits.shape,
         (self.batch_size, self.seq_length, self.vocab_size))

Пример #3

Показать файл

Файл: test_modeling_roformer.py Проект: huggingface/transformers

    def test_inference_masked_lm(self):
        model = RoFormerForMaskedLM.from_pretrained(
            "junnyu/roformer_chinese_base")
        input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
        output = model(input_ids)[0]

        # TODO Replace vocab size
        vocab_size = 50000

        expected_shape = torch.Size((1, 6, vocab_size))
        self.assertEqual(output.shape, expected_shape)

        # TODO Replace values below with what was printed above.
        expected_slice = torch.tensor([[[-0.1205, -1.0265, 0.2922],
                                        [-1.5134, 0.1974, 0.1519],
                                        [-5.0135, -3.9003, -0.8404]]])

        self.assertTrue(
            torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))

Пример #4

Показать файл

Файл: test_mlm_v1.py Проект: JunnYu/RoFormer_pytorch

import torch
import tensorflow as tf
from transformers import RoFormerForMaskedLM, RoFormerTokenizer, TFRoFormerForMaskedLM

text = "今天[MASK]很好，我[MASK]去公园玩。"
tokenizer = RoFormerTokenizer.from_pretrained("junnyu/roformer_chinese_base")
pt_model = RoFormerForMaskedLM.from_pretrained("junnyu/roformer_chinese_base")
tf_model = TFRoFormerForMaskedLM.from_pretrained(
    "junnyu/roformer_chinese_base", from_pt=True)
pt_inputs = tokenizer(text, return_tensors="pt")
tf_inputs = tokenizer(text, return_tensors="tf")
# pytorch
with torch.no_grad():
    pt_outputs = pt_model(**pt_inputs).logits[0]
pt_outputs_sentence = "pytorch: "
for i, id in enumerate(tokenizer.encode(text)):
    if id == tokenizer.mask_token_id:
        tokens = tokenizer.convert_ids_to_tokens(pt_outputs[i].topk(k=5)[1])
        pt_outputs_sentence += "[" + "||".join(tokens) + "]"
    else:
        pt_outputs_sentence += "".join(
            tokenizer.convert_ids_to_tokens([id], skip_special_tokens=True))
print(pt_outputs_sentence)
# tf
tf_outputs = tf_model(**tf_inputs, training=False).logits[0]
tf_outputs_sentence = "tf: "
for i, id in enumerate(tokenizer.encode(text)):
    if id == tokenizer.mask_token_id:
        tokens = tokenizer.convert_ids_to_tokens(
            tf.math.top_k(tf_outputs[i], k=5)[1])
        tf_outputs_sentence += "[" + "||".join(tokens) + "]"