Python AlbertModel.parameters примеры использования

Язык программирования: Python

Пространство имен/Пакет: transformers

Класс/Тип: AlbertModel

Метод/Функция: parameters

Примеров на hotexamples.com: 2

Python AlbertModel.parameters - 2 примера найдено. Это лучшие примеры Python кода для transformers.AlbertModel.parameters, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

AlbertModel(30)

from_pretrained(30)

to(3)

eval(2)

get_input_embeddings(2)

named_parameters(2)

parameters(2)

state_dict(1)

Пример #1

Показать файл

Файл: coherence_model.py Проект: dennisylyung/topdown_word_segmentation

class AlbertCoherenceRank(AlbertPreTrainedModel):
    def __init__(self,
                 config,
                 sequence_length,
                 mlp_sizes=None,
                 mlp_dropout=0.5,
                 freeze_albert=True):
        super().__init__(config)
        self.albert = AlbertModel(config)
        if freeze_albert:
            for p in self.albert.parameters():
                p.requires_grad = False

        mlp_layers = list()
        input_dim = self.albert.config.hidden_size * sequence_length
        if not mlp_sizes:
            mlp_sizes = [1024, 512, 256]
        for mlp_size in mlp_sizes:
            mlp_layers.append(torch.nn.Linear(input_dim, mlp_size))
            mlp_layers.append(torch.nn.LeakyReLU())
            mlp_layers.append(torch.nn.Dropout(p=mlp_dropout))
            input_dim = mlp_size
        mlp_layers.append(torch.nn.Linear(input_dim, 1))
        self.mlp = torch.nn.Sequential(*mlp_layers)

    def forward(self, input_i, input_j, target=None, **kwargs):
        """
        Calculate rank loss for two inputs, i and j. if taget is not provided, i should rank higher than j
        :param input_i: text 1. if target is not set, this should have higher coherence
        :param input_j: text 2. if target is not set, this should have lower coherence
        :param target: indicates ranking difference. 1 if i>j, -1 otherwise, defaults to 1
        :return: margin ranking loss, predicted i, predicted j
        """
        albert_out_i, _ = self.albert(input_i)
        predicted_i = self.mlp(torch.flatten(albert_out_i, start_dim=1))

        albert_out_j, _ = self.albert(input_j)
        predicted_j = self.mlp(torch.flatten(albert_out_j, start_dim=1))

        if not target:
            target = torch.ones(len(predicted_i)).to(self.device)

        loss = -((predicted_i - predicted_j) * target).sigmoid().log().mean()

        return loss, predicted_i, predicted_j

Пример #2

Показать файл

class AlbertLstmCrf(AlbertPreTrainedModel):
    def __init__(self, config, extra_config, ignore_ids):
        """
        num_labels : int, required
            Number of tags.
        idx2tag : ``Dict[int, str]``, required
            A mapping {label_id -> label}. Example: {0:"B-LOC", 1:"I-LOC", 2:"O"}
        label_encoding : ``str``, required
            Indicates which constraint to apply. Current choices are
            "BIO", "IOB1", "BIOUL", "BMES" and "BIOES",.
                B = Beginning
                I/M = Inside / Middle
                L/E = Last / End
                O = Outside
                U/W/S = Unit / Whole / Single
        """
        super(AlbertLstmCrf, self).__init__(config)
        self.pretrained = AlbertModel(config)
        self.dropout = nn.Dropout(extra_config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.bilstm = nn.LSTM(input_size=config.hidden_size,
                              hidden_size=config.hidden_size // 2,
                              batch_first=True,
                              num_layers=extra_config.num_layers,
                              dropout=extra_config.lstm_dropout,
                              bidirectional=True)
        self.crf = crf(config.num_labels, extra_config.label_encoding,
                       extra_config.idx2tag)
        self.init_weights()
        if extra_config.freez_prrtrained:
            for param in self.pretrained.parameters():
                param.requires_grad = False

        self.ignore_ids = ignore_ids

    def forward(self,
                input_ids,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None,
                labels=None):
        # outputs的组成：
        # last_hidden_state： Sequence of hidden-states at the output of the last layer of the model.
        #                     (batch_size, sequence_length, hidden_size)
        # pooler_output:      Last layer hidden-state of the first token of the sequence (classification token)
        #                     processed by a Linear layer and a Tanh activation function.
        # hidden_states：     one for the output of the embeddings + one for the output of each layer.
        #                     each is (batch_size, sequence_length, hidden_size)
        # attentions:         Attentions weights after the attention softmax of each layer.
        #                     each is (batch_size, num_heads, sequence_length, sequence_length)
        outputs = self.pretrained(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )
        last_hidden_state = outputs[0]

        seq_output = self.dropout(last_hidden_state)
        seq_output, _ = self.bilstm(seq_output)
        seq_output = nn.LayerNorm(seq_output.size()[-1])(seq_output)
        logits = self.classifier(seq_output)

        outputs = (logits, ) + outputs[2:]

        masked_labels, masked_logits = self._get_masked_inputs(
            input_ids, labels, logits, attention_mask)
        if labels is not None:
            loss = self.crf(masked_logits, masked_labels,
                            mask=None)  # mask=None: 已经处理了所有的无用的位置
            outputs = (loss, ) + outputs

        # (loss), logits, (hidden_states), (attentions)
        return outputs

    def _get_masked_inputs(self, input_ids, label_ids, logits, attention_mask):
        ignore_ids = self.ignore_ids

        # Remove unuseful positions
        masked_ids = input_ids[(1 == attention_mask)]
        masked_labels = label_ids[(1 == attention_mask)]
        masked_logits = logits[(1 == attention_mask)]
        for id in ignore_ids:
            masked_labels = masked_labels[(id != masked_ids)]
            masked_logits = masked_logits[(id != masked_ids)]
            masked_ids = masked_ids[(id != masked_ids)]

        return masked_labels, masked_logits