Пример #1
0
class roBerta(nn.Module):
    def __init__(self, config, num=0):
        super(roBerta, self).__init__()
        model_config = RobertaConfig()
        model_config.vocab_size = config.vocab_size
        model_config.hidden_size = config.hidden_size[0]
        model_config.num_attention_heads = 16
        # 计算loss的方法
        self.loss_method = config.loss_method
        self.multi_drop = config.multi_drop

        self.roberta = RobertaModel(model_config)
        if config.requires_grad:
            for param in self.roberta.parameters():
                param.requires_grad = True

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.hidden_size = config.hidden_size[num]
        if self.loss_method in ['binary', 'focal_loss', 'ghmc']:
            self.classifier = nn.Linear(self.hidden_size, 1)
        else:
            self.classifier = nn.Linear(self.hidden_size, self.num_labels)
        self.text_linear = nn.Linear(config.embeding_size,
                                     config.hidden_size[0])
        self.vocab_layer = nn.Linear(config.hidden_size[0], config.vocab_size)

        self.classifier.apply(self._init_weights)
        self.roberta.apply(self._init_weights)
        self.text_linear.apply(self._init_weights)
        self.vocab_layer.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                inputs=None,
                attention_mask=None,
                output_id=None,
                labels=None):
        inputs = torch.relu(self.text_linear(inputs))
        bert_outputs = self.roberta(inputs_embeds=inputs,
                                    attention_mask=attention_mask)

        #calculate mlm loss
        last_hidden_state = bert_outputs[0]
        output_id_tmp = output_id[output_id.ne(-100)]
        output_id_emb = last_hidden_state[output_id.ne(-100)]
        pre_score = self.vocab_layer(output_id_emb)
        loss_cro = CrossEntropyLoss()
        mlm_loss = loss_cro(torch.sigmoid(pre_score), output_id_tmp)

        labels_bool = labels.ne(-1)
        if labels_bool.sum().item() == 0:
            return mlm_loss, torch.tensor([])

        #calculate label loss
        pooled_output = bert_outputs[1]
        out = self.classifier(pooled_output)
        out = out[labels_bool]
        labels_tmp = labels[labels_bool]
        label_loss = compute_loss(out, labels_tmp)
        out = torch.sigmoid(out).flatten()
        return mlm_loss + label_loss, out

        return out, loss
Пример #2
0
class ClassifyModel(nn.Module):
    def __init__(self, args):
        super(ClassifyModel, self).__init__()
        args.out_size = len(args.dense_features)
        self.dropout = nn.Dropout(args.hidden_dropout_prob)
        self.args = args

        # 创建BERT模型,并且导入预训练模型
        config = RobertaConfig.from_pretrained(args.pretrained_model_path)
        config.output_hidden_states = True
        args.hidden_size = config.hidden_size
        args.num_hidden_layers = config.num_hidden_layers
        self.bert_text_layer = RobertaModel.from_pretrained(args.pretrained_model_path, config=config)
        self.text_linear = nn.Linear(in_features=args.text_dim + args.vocab_dim_v1 * len(args.text_features),
                                     out_features=args.hidden_size)
        logger.info("Load linear from %s", os.path.join(args.pretrained_model_path, "linear.bin"))
        self.text_linear.load_state_dict(torch.load(os.path.join(args.pretrained_model_path, "linear.bin")))
        logger.info("Load embeddings from %s", os.path.join(args.pretrained_model_path, "embeddings.bin"))

        self.text_embeddings = nn.Embedding.from_pretrained(
            torch.load(os.path.join(args.pretrained_model_path, "embeddings.bin"))['weight'],
            freeze=True)
        args.out_size += args.hidden_size * 2

        # 创建fusion-layer模型,随机初始化
        config = RobertaConfig()
        config.num_hidden_layers = 4
        config.intermediate_size = 2048
        config.hidden_size = 512
        config.num_attention_heads = 16
        config.vocab_size = 5
        self.fusion_text_layer = RobertaModel(config=config)
        self.fusion_text_layer.apply(self._init_weights)
        self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512)
        self.text_linear_1.apply(self._init_weights)
        self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size)
        args.out_size += 1024

        # 创建分类器,随机初始化
        self.classifierHead = ClassificationHead(args)
        self.classifierHead.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                dense_features,
                text_features,
                text_ids,
                text_masks,
                fusion_text_features,
                fusion_text_masks,
                labels=None):

        outputs = []
        # 获取浮点数,作为分类器的输入
        outputs.append(dense_features.float())

        # 获取BERT模型的hidden state,并且做max pooling和mean pooling作为分类器的输入
        text_masks = text_masks.float()
        text_embedding = self.text_embeddings(text_ids).view(text_ids.size(0), text_ids.size(1), -1)  # reshape
        text_features = torch.cat((text_features.float(), text_embedding), -1)  # concat
        text_features = torch.relu(self.text_linear(self.dropout(text_features)))  # relu
        hidden_states = self.bert_text_layer(inputs_embeds=text_features, attention_mask=text_masks)[0]  # bert_text_layer

        embed_mean = (hidden_states * text_masks.unsqueeze(-1)).sum(1) / text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        # bert的embedding的mean, max作为分类器的输入
        outputs.append(embed_mean)
        outputs.append(embed_max)

        # 获取fusion-layer的hidden state,并且做max pooling和mean pooling作为分类器的输入
        fusion_text_masks = fusion_text_masks.float()
        fusion_text_features = torch.cat((fusion_text_features.float(), hidden_states), -1)
        batch, seq_length, embedding_dim = fusion_text_features.size()
        fusion_text_features = self.norm(fusion_text_features.view(-1, embedding_dim))\
            .view(batch, seq_length, embedding_dim)
        fusion_text_features = torch.relu(self.text_linear_1(fusion_text_features))
        hidden_states = self.fusion_text_layer(inputs_embeds=fusion_text_features,
                                               attention_mask=fusion_text_masks)[0]  # transfromer fusion
        embed_mean = (hidden_states * fusion_text_masks.unsqueeze(-1)).sum(1) / fusion_text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - fusion_text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)

        # 将特征(bert max/mean pooling+fusion layer)输入分类器,得到20分类的logits
        # 年龄10维,性别2维,交叉之后就是20维
        final_hidden_state = torch.cat(outputs, dim=-1)
        logits = self.classifierHead(final_hidden_state)

        # 返回loss或概率结果
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss
        else:
            # prob:[batch, age, gender]
            prob = torch.softmax(logits, -1)
            # age_probs:[batch, age], 将每个age下的各个gender相加就可以得到该age的概率
            age_probs = prob.view(-1, 10, 2).sum(dim=2,keepdims=False)
            # gender_probs:[batch, gender]
            gender_probs = prob.view(-1, 10, 2).sum(1)
            return age_probs, gender_probs
Пример #3
0
class Model(nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        args.out_size = len(args.dense_features)
        self.dropout = nn.Dropout(args.hidden_dropout_prob)
        self.args = args

        #创建BERT模型,并且导入预训练模型
        config = RobertaConfig.from_pretrained(args.pretrained_model_path)
        config.output_hidden_states = True
        args.hidden_size = config.hidden_size
        args.num_hidden_layers = config.num_hidden_layers
        self.text_layer = RobertaModel.from_pretrained(
            args.pretrained_model_path, config=config)
        self.text_linear = nn.Linear(
            args.text_dim + args.vocab_dim_v1 * len(args.text_features),
            args.hidden_size)
        logger.info("Load linear from %s",
                    os.path.join(args.pretrained_model_path, "linear.bin"))
        self.text_linear.load_state_dict(
            torch.load(os.path.join(args.pretrained_model_path, "linear.bin")))
        logger.info("Load embeddings from %s",
                    os.path.join(args.pretrained_model_path, "embeddings.bin"))
        self.text_embeddings = nn.Embedding.from_pretrained(torch.load(
            os.path.join(args.pretrained_model_path,
                         "embeddings.bin"))['weight'],
                                                            freeze=True)
        args.out_size += args.hidden_size * 2

        #创建Decoder模型,随机初始化
        config = RobertaConfig()
        config.num_hidden_layers = 4
        config.intermediate_size = 2048
        config.hidden_size = 512
        config.num_attention_heads = 16
        config.vocab_size = 5
        self.text_layer_1 = RobertaModel(config=config)
        self.text_layer_1.apply(self._init_weights)
        self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512)
        self.text_linear_1.apply(self._init_weights)
        self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size)
        args.out_size += 1024

        #创建分类器,随机初始化
        self.classifier = ClassificationHead(args)
        self.classifier.apply(self._init_weights)

    def _init_weights(self, module):
        """ Initialize the weights """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self,
                dense_features,
                text_features,
                text_ids,
                text_masks,
                text_features_1,
                text_masks_1,
                labels=None):
        outputs = []
        #获取浮点数,作为分类器的输入
        outputs.append(dense_features.float())
        #获取BERT模型的hidden state,并且做max pooling和mean pooling作为分类器的输入
        text_masks = text_masks.float()
        text_embedding = self.text_embeddings(text_ids).view(
            text_ids.size(0), text_ids.size(1), -1)
        text_features = torch.cat((text_features.float(), text_embedding), -1)
        text_features = torch.relu(
            self.text_linear(self.dropout(text_features)))
        hidden_states = self.text_layer(inputs_embeds=text_features,
                                        attention_mask=text_masks)[0]
        embed_mean = (hidden_states * text_masks.unsqueeze(-1)
                      ).sum(1) / text_masks.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)
        #获取decoder的hidden state,并且做max pooling和mean pooling作为分类器的输入
        text_masks_1 = text_masks_1.float()
        text_features_1 = torch.cat((text_features_1.float(), hidden_states),
                                    -1)
        bs, le, dim = text_features_1.size()
        text_features_1 = self.norm(text_features_1.view(-1, dim)).view(
            bs, le, dim)
        text_features_1 = torch.relu(self.text_linear_1(text_features_1))
        hidden_states = self.text_layer_1(inputs_embeds=text_features_1,
                                          attention_mask=text_masks_1)[0]
        embed_mean = (hidden_states * text_masks_1.unsqueeze(-1)
                      ).sum(1) / text_masks_1.sum(1).unsqueeze(-1)
        embed_mean = embed_mean.float()
        embed_max = hidden_states + (1 - text_masks_1).unsqueeze(-1) * (-1e10)
        embed_max = embed_max.max(1)[0].float()
        outputs.append(embed_mean)
        outputs.append(embed_max)

        #将特征输入分类器,得到20分类的logits
        final_hidden_state = torch.cat(outputs, -1)
        logits = self.classifier(final_hidden_state)

        #返回loss或概率结果
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss
        else:
            prob = torch.softmax(logits, -1)
            age_probs = prob.view(-1, 10, 2).sum(2)
            gender_probs = prob.view(-1, 10, 2).sum(1)
            return age_probs, gender_probs