Python RobertaModel.apply примеры использования

Язык программирования: Python

Пространство имен/Пакет: transformers

Класс/Тип: RobertaModel

Метод/Функция: apply

Примеров на hotexamples.com: 3

Python RobertaModel.apply - 3 примера найдено. Это лучшие примеры Python кода для transformers.RobertaModel.apply, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RobertaModel(30)

from_pretrained(30)

eval(6)

to(5)

apply(3)

train(3)

load_state_dict(2)

parameters(2)

_get_resized_embeddings(1)

cuda(1)

get_input_embeddings(1)

init_weights(1)

pooler(1)

resize_token_embeddings(1)

state_dict(1)

Пример #1

Показать файл

Файл: models.py Проект: tanshoudong/NLP-program

class roBerta(nn.Module):
    def __init__(self, config, num=0):
        super(roBerta, self).__init__()
        model_config = RobertaConfig()
        model_config.vocab_size = config.vocab_size
        model_config.hidden_size = config.hidden_size[0]
        model_config.num_attention_heads = 16
        # 计算loss的方法
        self.loss_method = config.loss_method
        self.multi_drop = config.multi_drop

        self.roberta = RobertaModel(model_config)
        if config.requires_grad:
            for param in self.roberta.parameters():
                param.requires_grad = True

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.hidden_size = config.hidden_size[num]
        if self.loss_method in ['binary', 'focal_loss', 'ghmc']:
            self.classifier = nn.Linear(self.hidden_size, 1)
        else:
            self.classifier = nn.Linear(self.hidden_size, self.num_labels)
        self.text_linear = nn.Linear(config.embeding_size,
                                     config.hidden_size[0])
        self.vocab_layer = nn.Linear(config.hidden_size[0], config.vocab_size)

        self.classifier.apply(self._init_weights)
        self.roberta.apply(self._init_weights)
        self.text_linear.apply(self._init_weights)
        self.vocab_layer.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                inputs=None,
                attention_mask=None,
                output_id=None,
                labels=None):
        inputs = torch.relu(self.text_linear(inputs))
        bert_outputs = self.roberta(inputs_embeds=inputs,
                                    attention_mask=attention_mask)

        #calculate mlm loss
        last_hidden_state = bert_outputs[0]
        output_id_tmp = output_id[output_id.ne(-100)]
        output_id_emb = last_hidden_state[output_id.ne(-100)]
        pre_score = self.vocab_layer(output_id_emb)
        loss_cro = CrossEntropyLoss()
        mlm_loss = loss_cro(torch.sigmoid(pre_score), output_id_tmp)

        labels_bool = labels.ne(-1)
        if labels_bool.sum().item() == 0:
            return mlm_loss, torch.tensor([])

        #calculate label loss
        pooled_output = bert_outputs[1]
        out = self.classifier(pooled_output)
        out = out[labels_bool]
        labels_tmp = labels[labels_bool]
        label_loss = compute_loss(out, labels_tmp)
        out = torch.sigmoid(out).flatten()
        return mlm_loss + label_loss, out

        return out, loss

Пример #2

Показать файл

class ClassifyModel(nn.Module):
    def __init__(self, args):
        super(ClassifyModel, self).__init__()
        args.out_size = len(args.dense_features)
        self.dropout = nn.Dropout(args.hidden_dropout_prob)
        self.args = args

        # 创建BERT模型，并且导入预训练模型
        config = RobertaConfig.from_pretrained(args.pretrained_model_path)
        config.output_hidden_states = True
        args.hidden_size = config.hidden_size
        args.num_hidden_layers = config.num_hidden_layers
        self.bert_text_layer = RobertaModel.from_pretrained(args.pretrained_model_path, config=config)
        self.text_linear = nn.Linear(in_features=args.text_dim + args.vocab_dim_v1 * len(args.text_features),
                                     out_features=args.hidden_size)
        logger.info("Load linear from %s", os.path.join(args.pretrained_model_path, "linear.bin"))
        self.text_linear.load_state_dict(torch.load(os.path.join(args.pretrained_model_path, "linear.bin")))
        logger.info("Load embeddings from %s", os.path.join(args.pretrained_model_path, "embeddings.bin"))

        self.text_embeddings = nn.Embedding.from_pretrained(
            torch.load(os.path.join(args.pretrained_model_path, "embeddings.bin"))['weight'],
            freeze=True)
        args.out_size += args.hidden_size * 2

        # 创建fusion-layer模型，随机初始化
        config = RobertaConfig()
        config.num_hidden_layers = 4
        config.intermediate_size = 2048
        config.hidden_size = 512
        config.num_attention_heads = 16
        config.vocab_size = 5
        self.fusion_text_layer = RobertaModel(config=config)
        self.fusion_text_layer.apply(self._init_weights)
        self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512)
        self.text_linear_1.apply(self._init_weights)
        self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size)
        args.out_size += 1024

        # 创建分类器，随机初始化
        self.classifierHead = ClassificationHead(args)
        self.classifierHead.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                dense_features,
                text_features,
                text_ids,
                text_masks,
                fusion_text_features,
                fusion_text_masks,
                labels=None):

        outputs = []
        # 获取浮点数，作为分类器的输入
        outputs.append(dense_features.float())

        # 获取BERT模型的hidden state，并且做max pooling和mean pooling作为分类器的输入
        text_masks = text_masks.float()
        text_embedding = self.text_embeddings(text_ids).view(text_ids.size(0), text_ids.size(1), -1)  # reshape
        text_features = torch.cat((text_features.float(), text_embedding), -1)  # concat
        text_features = torch.relu(self.text_linear(self.dropout(text_features)))  # relu
        hidden_states = self.bert_text_layer(inputs_embeds=text_features, attention_mask=text_masks)[0]  # bert_text_layer

        embed_mean = (hidden_states * text_masks.unsqueeze(-1)).sum(1) / text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        # bert的embedding的mean, max作为分类器的输入
        outputs.append(embed_mean)
        outputs.append(embed_max)

        # 获取fusion-layer的hidden state，并且做max pooling和mean pooling作为分类器的输入
        fusion_text_masks = fusion_text_masks.float()
        fusion_text_features = torch.cat((fusion_text_features.float(), hidden_states), -1)
        batch, seq_length, embedding_dim = fusion_text_features.size()
        fusion_text_features = self.norm(fusion_text_features.view(-1, embedding_dim))\
            .view(batch, seq_length, embedding_dim)
        fusion_text_features = torch.relu(self.text_linear_1(fusion_text_features))
        hidden_states = self.fusion_text_layer(inputs_embeds=fusion_text_features,
                                               attention_mask=fusion_text_masks)[0]  # transfromer fusion
        embed_mean = (hidden_states * fusion_text_masks.unsqueeze(-1)).sum(1) / fusion_text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - fusion_text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)

        # 将特征(bert max/mean pooling+fusion layer)输入分类器，得到20分类的logits
        # 年龄10维,性别2维,交叉之后就是20维
        final_hidden_state = torch.cat(outputs, dim=-1)
        logits = self.classifierHead(final_hidden_state)

        # 返回loss或概率结果
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss
        else:
            # prob:[batch, age, gender]
            prob = torch.softmax(logits, -1)
            # age_probs:[batch, age], 将每个age下的各个gender相加就可以得到该age的概率
            age_probs = prob.view(-1, 10, 2).sum(dim=2,keepdims=False)
            # gender_probs:[batch, gender]
            gender_probs = prob.view(-1, 10, 2).sum(1)
            return age_probs, gender_probs

Пример #3

Показать файл

class Model(nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        args.out_size = len(args.dense_features)
        self.dropout = nn.Dropout(args.hidden_dropout_prob)
        self.args = args

        #创建BERT模型，并且导入预训练模型
        config = RobertaConfig.from_pretrained(args.pretrained_model_path)
        config.output_hidden_states = True
        args.hidden_size = config.hidden_size
        args.num_hidden_layers = config.num_hidden_layers
        self.text_layer = RobertaModel.from_pretrained(
            args.pretrained_model_path, config=config)
        self.text_linear = nn.Linear(
            args.text_dim + args.vocab_dim_v1 * len(args.text_features),
            args.hidden_size)
        logger.info("Load linear from %s",
                    os.path.join(args.pretrained_model_path, "linear.bin"))
        self.text_linear.load_state_dict(
            torch.load(os.path.join(args.pretrained_model_path, "linear.bin")))
        logger.info("Load embeddings from %s",
                    os.path.join(args.pretrained_model_path, "embeddings.bin"))
        self.text_embeddings = nn.Embedding.from_pretrained(torch.load(
            os.path.join(args.pretrained_model_path,
                         "embeddings.bin"))['weight'],
                                                            freeze=True)
        args.out_size += args.hidden_size * 2

        #创建Decoder模型，随机初始化
        config = RobertaConfig()
        config.num_hidden_layers = 4
        config.intermediate_size = 2048
        config.hidden_size = 512
        config.num_attention_heads = 16
        config.vocab_size = 5
        self.text_layer_1 = RobertaModel(config=config)
        self.text_layer_1.apply(self._init_weights)
        self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512)
        self.text_linear_1.apply(self._init_weights)
        self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size)
        args.out_size += 1024

        #创建分类器，随机初始化
        self.classifier = ClassificationHead(args)
        self.classifier.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                dense_features,
                text_features,
                text_ids,
                text_masks,
                text_features_1,
                text_masks_1,
                labels=None):
        outputs = []
        #获取浮点数，作为分类器的输入
        outputs.append(dense_features.float())
        #获取BERT模型的hidden state，并且做max pooling和mean pooling作为分类器的输入
        text_masks = text_masks.float()
        text_embedding = self.text_embeddings(text_ids).view(
            text_ids.size(0), text_ids.size(1), -1)
        text_features = torch.cat((text_features.float(), text_embedding), -1)
        text_features = torch.relu(
            self.text_linear(self.dropout(text_features)))
        hidden_states = self.text_layer(inputs_embeds=text_features,
                                        attention_mask=text_masks)[0]
        embed_mean = (hidden_states * text_masks.unsqueeze(-1)
                      ).sum(1) / text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)
        #获取decoder的hidden state，并且做max pooling和mean pooling作为分类器的输入
        text_masks_1 = text_masks_1.float()
        text_features_1 = torch.cat((text_features_1.float(), hidden_states),
                                    -1)
        bs, le, dim = text_features_1.size()
        text_features_1 = self.norm(text_features_1.view(-1, dim)).view(
            bs, le, dim)
        text_features_1 = torch.relu(self.text_linear_1(text_features_1))
        hidden_states = self.text_layer_1(inputs_embeds=text_features_1,
                                          attention_mask=text_masks_1)[0]
        embed_mean = (hidden_states * text_masks_1.unsqueeze(-1)
                      ).sum(1) / text_masks_1.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks_1).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)

        #将特征输入分类器，得到20分类的logits
        final_hidden_state = torch.cat(outputs, -1)
        logits = self.classifier(final_hidden_state)

        #返回loss或概率结果
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss
        else:
            prob = torch.softmax(logits, -1)
            age_probs = prob.view(-1, 10, 2).sum(2)
            gender_probs = prob.view(-1, 10, 2).sum(1)
            return age_probs, gender_probs