Exemplo n.º 1
0
 def from_torch(model: TorchRobertaModel,
                device: Optional[torch.device] = None):
     if device is not None and 'cuda' in device.type and torch.cuda.is_available(
     ):
         model.to(device)
     encoder = BertEncoder.from_torch(model.encoder)
     pooler = BertPooler.from_torch(model.pooler)
     return RobertaModel(model.embeddings, encoder, pooler, model.config)
class TestRobertaModel(unittest.TestCase):
    def init_data(self, use_cuda) -> None:
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        turbo_transformers.set_num_threads(4)

        self.test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')

        self.cfg = RobertaConfig()
        self.torch_model = RobertaModel(self.cfg)
        self.torch_model.eval()

        if torch.cuda.is_available():
            self.torch_model.to(self.test_device)

        self.turbo_model = turbo_transformers.RobertaModel.from_torch(
            self.torch_model, self.test_device)

    def check_torch_and_turbo(self, use_cuda):
        self.init_data(use_cuda)
        num_iter = 20
        device_name = "GPU" if use_cuda else "CPU"
        input_ids = torch.randint(low=0,
                                  high=self.cfg.vocab_size - 1,
                                  size=(1, 10),
                                  dtype=torch.long,
                                  device=self.test_device)

        torch_model = lambda: self.torch_model(input_ids)
        torch_result, torch_qps, torch_time = \
            test_helper.run_model(torch_model, use_cuda, num_iter)
        print(f'RobertaModel PyTorch({device_name}) QPS {torch_qps}')

        turbo_model = (lambda: self.turbo_model(input_ids))
        with turbo_transformers.pref_guard("roberta_perf") as perf:
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)
        print(f'RobertaModel TurboTransformer({device_name}) QPS {turbo_qps}')

        torch_result_final = torch_result[0].cpu().numpy()

        turbo_result_final = turbo_result[0].cpu().numpy()
        # print(numpy.size(torch_result_final), numpy.size(turbo_result_final))
        # print(torch_result_final - turbo_result_final)
        self.assertTrue(
            numpy.allclose(torch_result_final,
                           turbo_result_final,
                           atol=1e-3,
                           rtol=1e-3))

    def test_Roberta_model(self):
        if torch.cuda.is_available() and \
            turbo_transformers.config.is_compiled_with_cuda():
            self.check_torch_and_turbo(use_cuda=True)
        self.check_torch_and_turbo(use_cuda=False)
    def __init__(self,
                 config,
                 num_labels_list,
                 layer=-1,
                 freeze=False,
                 tokens=False,
                 tagger=False,
                 relations=False,
                 num_attention_heads=12,
                 class_weights=None):
        super().__init__(config)
        self.num_labels = num_labels_list

        self.roberta = RobertaModel(config)

        if freeze:
            for param in self.roberta.parameters():
                param.requires_grad = False

        self.feature_extractors = nn.ModuleList()
        self.classifiers = nn.ModuleList()

        for task_ind, task_num_labels in enumerate(num_labels_list):
            self.feature_extractors.append(
                RepresentationProjectionLayer(
                    config,
                    layer=layer,
                    tokens=tokens,
                    tagger=tagger[task_ind],
                    relations=relations[task_ind],
                    num_attention_heads=num_attention_heads))
            if relations[task_ind]:
                self.classifiers.append(
                    ClassificationHead(config,
                                       task_num_labels,
                                       hidden_size=num_attention_heads))
            else:
                self.classifiers.append(
                    ClassificationHead(config, task_num_labels))

        # Are we operating as a sequence classifier (1 label per input sequence) or a tagger (1 label per input token in the sequence)
        self.tagger = tagger
        self.relations = relations

        if class_weights is None:
            self.class_weights = [None] * len(self.classifiers)
        else:
            self.class_weights = class_weights

        self.init_weights()
Exemplo n.º 4
0
    def __init__(self, config, weight=None):
        super(RobertaForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)
        self.weight = weight
    def test_from_pytorch(self):
        with torch.no_grad():
            with self.subTest("roberta-base"):
                tokenizer = RobertaTokenizerFast.from_pretrained(
                    "roberta-base")
                fx_model = FlaxRobertaModel.from_pretrained("roberta-base")
                pt_model = RobertaModel.from_pretrained("roberta-base")

                # Check for simple input
                pt_inputs = tokenizer.encode_plus(
                    "This is a simple input",
                    return_tensors=TensorType.PYTORCH)
                fx_inputs = tokenizer.encode_plus(
                    "This is a simple input", return_tensors=TensorType.JAX)
                pt_outputs = pt_model(**pt_inputs)
                fx_outputs = fx_model(**fx_inputs)

                self.assertEqual(
                    len(fx_outputs), len(pt_outputs),
                    "Output lengths differ between Flax and PyTorch")

                for fx_output, pt_output in zip(fx_outputs,
                                                pt_outputs.to_tuple()):
                    self.assert_almost_equals(fx_output, pt_output.numpy(),
                                              5e-3)
Exemplo n.º 6
0
    def __init__(self, tagset_size):
        super(RobertaForSequenceClassification, self).__init__()
        self.tagset_size = tagset_size

        self.roberta_single = RobertaModel.from_pretrained(pretrain_model_dir)
        self.single_hidden2tag = RobertaClassificationHead(
            bert_hidden_dim, tagset_size)
Exemplo n.º 7
0
    def __init__(self, config, finetune, list_labels=[], use_bilstms=False):
        #config2 = config
        #config2.num_labels = 2
        super(MTLRobertaForTokenClassification, self).__init__(config)
        self.num_labels = list_labels
        self.num_tasks = len(self.num_labels)
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.use_bilstms = use_bilstms
        self.lstm_size = 400
        self.lstm_layers = 2
        self.bidirectional_lstm = True

        if self.use_bilstms:
            self.lstm = nn.LSTM(config.hidden_size,
                                self.lstm_size,
                                num_layers=self.lstm_layers,
                                batch_first=True,
                                bidirectional=self.bidirectional_lstm)

            self.hidden2tagList = nn.ModuleList([
                nn.Linear(
                    self.lstm_size * (2 if self.bidirectional_lstm else 1),
                    self.num_labels[idtask])
                for idtask in range(self.num_tasks)
            ])
        else:

            self.hidden2tagList = nn.ModuleList([
                nn.Linear(config.hidden_size, self.num_labels[idtask])
                for idtask in range(self.num_tasks)
            ])

        self.finetune = finetune
        self.init_weights()
    def init_data(self, use_cuda) -> None:
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        turbo_transformers.set_num_threads(4)

        self.test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')

        self.cfg = RobertaConfig()
        self.torch_model = RobertaModel(self.cfg)
        self.torch_model.eval()

        if torch.cuda.is_available():
            self.torch_model.to(self.test_device)

        self.turbo_model = turbo_transformers.RobertaModel.from_torch(
            self.torch_model, self.test_device)
Exemplo n.º 9
0
    def __init__(self, config, *model_args, **model_kargs):
        super().__init__(config)
        self.model_args = model_kargs["model_args"]
        self.roberta = RobertaModel(config)

        if self.model_args.do_mlm:
            self.lm_head = RobertaLMHead(config)

        cl_init(self, config)
Exemplo n.º 10
0
    def __init__(self, config, dropout=0.1):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.config = config
        self.roberta = RobertaModel(config)

        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.sub_num = [1]

        self.init_weights()
Exemplo n.º 11
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)

        config_copy = deepcopy(config)
        setattr(config_copy, "new_hidden_size",
                config.hidden_size + self.num_size)
        self.classifier = RobertaClassificationHead(config_copy)

        self.init_weights()
Exemplo n.º 12
0
    def __init__(self, config):
        super(PhoBertQueryNER, self).__init__(config)
        self.roberta = RobertaModel(config)

        # self.start_outputs = nn.Linear(config.hidden_size, 2)
        # self.end_outputs = nn.Linear(config.hidden_size, 2)
        self.start_outputs = nn.Linear(config.hidden_size, 1)
        self.end_outputs = nn.Linear(config.hidden_size, 1)
        self.span_embedding = MultiNonLinearClassifier(config.hidden_size * 2,
                                                       1, config.mrc_dropout)
        # self.span_embedding = SingleLinearClassifier(config.hidden_size * 2, 1)

        self.hidden_size = config.hidden_size

        self.init_weights()
Exemplo n.º 13
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        #self.dropout = nn.Dropout(config.hidden_dropout_prob)
        lstm_layer = 1
        self.lstm = nn.LSTM(input_size=config.hidden_size,
                            hidden_size=config.hidden_size,
                            num_layers=lstm_layer,
                            dropout=config.hidden_dropout_prob,
                            bidirectional=True)
        self.classifier = nn.Linear(config.hidden_size * 2, config.num_labels)

        self.init_weights()
Exemplo n.º 14
0
 def __init__(self, config, args, intent_label_lst, slot_label_lst):
     super(JointRoberta, self).__init__(config)
     self.args = args
     self.num_intent_labels = len(intent_label_lst)
     self.num_slot_labels = len(slot_label_lst)
     self.roberta = RobertaModel(config=config)  # Load pretrained bert
     self.intent_classifier = IntentClassifier(config.hidden_size,
                                               self.num_intent_labels,
                                               args.dropout_rate)
     self.slot_classifier = SlotClassifier(
         config.hidden_size, self.num_intent_labels, self.num_slot_labels,
         self.args.use_intent_context_concat,
         self.args.use_intent_context_attention, self.args.max_seq_len,
         self.args.intent_embedding_size,
         self.args.attention_embedding_size, self.args.attention_type,
         args.dropout_rate)
     if args.use_crf:
         self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
Exemplo n.º 15
0
def test(use_cuda):
    torch.set_grad_enabled(False)
    torch.set_num_threads(4)
    turbo_transformers.set_num_threads(4)

    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')

    cfg = RobertaConfig()
    torch_model = RobertaModel(cfg)
    torch_model.eval()

    if torch.cuda.is_available():
        torch_model.to(test_device)

    turbo_model = turbo_transformers.RobertaModel.from_torch(
        torch_model, test_device)

    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(1, 10),
                              dtype=torch.long,
                              device=test_device)

    torch_result = torch_model(input_ids)
    torch_result_final = torch_result[0].cpu().numpy()

    turbo_result = turbo_model(input_ids)
    turbo_result_final = turbo_result[0].cpu().numpy()

    # See the differences
    # print(numpy.size(torch_result_final), numpy.size(turbo_result_final))
    print(torch_result_final - turbo_result_final)
    assert (numpy.allclose(torch_result_final,
                           turbo_result_final,
                           atol=1e-3,
                           rtol=1e-3))
class CnlpRobertaForClassification(RobertaPreTrainedModel):
    config_class = RobertaConfig
    base_model_prefix = "roberta"

    def __init__(self,
                 config,
                 num_labels_list,
                 layer=-1,
                 freeze=False,
                 tokens=False,
                 tagger=False,
                 relations=False,
                 num_attention_heads=12,
                 class_weights=None):
        super().__init__(config)
        self.num_labels = num_labels_list

        self.roberta = RobertaModel(config)

        if freeze:
            for param in self.roberta.parameters():
                param.requires_grad = False

        self.feature_extractors = nn.ModuleList()
        self.classifiers = nn.ModuleList()

        for task_ind, task_num_labels in enumerate(num_labels_list):
            self.feature_extractors.append(
                RepresentationProjectionLayer(
                    config,
                    layer=layer,
                    tokens=tokens,
                    tagger=tagger[task_ind],
                    relations=relations[task_ind],
                    num_attention_heads=num_attention_heads))
            if relations[task_ind]:
                self.classifiers.append(
                    ClassificationHead(config,
                                       task_num_labels,
                                       hidden_size=num_attention_heads))
            else:
                self.classifiers.append(
                    ClassificationHead(config, task_num_labels))

        # Are we operating as a sequence classifier (1 label per input sequence) or a tagger (1 label per input token in the sequence)
        self.tagger = tagger
        self.relations = relations

        if class_weights is None:
            self.class_weights = [None] * len(self.classifiers)
        else:
            self.class_weights = class_weights

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        event_tokens=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
            Labels for computing the sequence classification/regression loss.
            Indices should be in :obj:`[0, ..., config.num_labels - 1]`.
            If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """

        outputs = self.roberta(input_ids,
                               attention_mask=attention_mask,
                               token_type_ids=token_type_ids,
                               position_ids=position_ids,
                               head_mask=head_mask,
                               inputs_embeds=inputs_embeds,
                               output_attentions=output_attentions,
                               output_hidden_states=True,
                               return_dict=True)

        batch_size, seq_len = input_ids.shape

        logits = []

        loss = None
        task_label_ind = 0

        for task_ind, task_num_labels in enumerate(self.num_labels):
            features = self.feature_extractors[task_ind](outputs.hidden_states,
                                                         event_tokens)
            task_logits = self.classifiers[task_ind](features)
            logits.append(task_logits)

            if labels is not None:
                if task_num_labels == 1:
                    #  We are doing regression
                    loss_fct = MSELoss()
                    task_loss = loss_fct(task_logits.view(-1), labels.view(-1))
                else:
                    if not self.class_weights[task_ind] is None:
                        class_weights = torch.FloatTensor(
                            self.class_weights[task_ind]).to(self.device)
                    else:
                        class_weights = None
                    loss_fct = CrossEntropyLoss(weight=class_weights)

                    if self.relations[task_ind]:
                        task_labels = labels[:, 0,
                                             task_label_ind:task_label_ind +
                                             seq_len, :]
                        task_label_ind += seq_len
                        task_loss = loss_fct(
                            task_logits.permute(0, 3, 1, 2),
                            task_labels.type(torch.LongTensor).to(
                                labels.device))
                    else:
                        task_labels = labels[:, 0, task_label_ind, :]
                        task_label_ind += 1
                        task_loss = loss_fct(
                            task_logits.view(-1, task_num_labels),
                            task_labels.reshape([
                                batch_size * seq_len,
                            ]).type(torch.LongTensor).to(labels.device))

                if loss is None:
                    loss = task_loss
                else:
                    loss += task_loss

#         if not return_dict:
#             output = (logits,) + outputs[2:]
#             return ((loss,) + output) if loss is not None else output

        if self.training:
            return SequenceClassifierOutput(
                loss=loss,
                logits=logits,
                hidden_states=outputs.hidden_states,
                attentions=outputs.attentions,
            )
        else:
            return SequenceClassifierOutput(loss=loss, logits=logits)
Exemplo n.º 17
0
import torch
from transformers.models.roberta.modeling_roberta import RobertaModel

from lambert.model import LambertModel

BATCH_SIZE = 4
SEQUENCE_LENGTH = 32

roberta = RobertaModel.from_pretrained('roberta-base')
lambert = LambertModel(roberta)

input_ids = torch.randint(0, 100, (BATCH_SIZE, SEQUENCE_LENGTH))
bboxes = torch.rand((BATCH_SIZE, SEQUENCE_LENGTH, 4))

lambert_output = lambert(input_ids=input_ids, bboxes=bboxes)
lambert_encoding = lambert_output.last_hidden_state

assert lambert_encoding.shape == (BATCH_SIZE, SEQUENCE_LENGTH, roberta.config.hidden_size)
assert lambert_encoding.dtype == torch.float