def __init__(self, pretrained_model, vocab_size):
     super().__init__()
     self.vocab_size = vocab_size
     config = BertConfig(vocab_size=vocab_size)
     self.bert = BertModel.from_pretrained(pretrained_model,
                                           return_dict=True)
     self.decoder = BertLMPredictionHead(config)
     self.decoder.decoder.weight.data = self.bert.embeddings.word_embeddings.weight.data
     for param in self.bert.parameters():
         param.requires_grad = True
     for param in self.decoder.parameters():
         param.requires_grad = True
示例#2
0
    def __init__(self, config, num_features):
        super().__init__()
        self.predictions = BertLMPredictionHead(config)
        self.num_features = num_features
        self.feature_predictions = nn.ModuleDict()

        for feature in range(num_features):
            self.feature_predictions[f'feature_{feature+1}_prediction'] = BertForOneFeaturePredictionHead(config)
示例#3
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.lm_layer = BertLMPredictionHead(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.init_weights()
示例#4
0
 def __init__(self, config):
     super().__init__()
     self.predictions = BertLMPredictionHead(config)
     self.topic_treat_prediction = BertTopicTreatPredictionHead(config)
示例#5
0
文件: modeling.py 项目: oriram/bert
 def __init__(self, config):
     super().__init__()
     self.predictions = BertLMPredictionHead(config)
     self.span_predictions = SpanPredictionHead(config)
class UniLMModel(nn.Module):
    def __init__(self, pretrained_model, vocab_size):
        super().__init__()
        self.vocab_size = vocab_size
        config = BertConfig(vocab_size=vocab_size)
        self.bert = BertModel.from_pretrained(pretrained_model,
                                              return_dict=True)
        self.decoder = BertLMPredictionHead(config)
        self.decoder.decoder.weight.data = self.bert.embeddings.word_embeddings.weight.data
        for param in self.bert.parameters():
            param.requires_grad = True
        for param in self.decoder.parameters():
            param.requires_grad = True

    def forward(self, input_ids, token_type_ids, compute_loss=False):
        """
        参数:
            input_ids:一个batch的输入token id,用[PAD]作为填充符号
            token_type_ids:指示这个句子归属于setence1还是sentence2
            compute_loss:如果为True,则在forward的最后计算loss,计算方式为交叉熵
        
        使用方法:
            pretrained_model = 'hfl/chinese-bert-wwm'
            tokenizer = BertTokenizer.from_pretrained(pretrained_model)
            model = UniLMModel(pretrained_model, tokenizer.vocab_size)
            
            question = ['小明有3个苹果,小红的苹果是小明的3倍,小红有几个苹果?', '正方形的周长为12,它的边长是多少']
            equation = ['3*5', '12/4']
            
            tokenized = tokenizer(question, equation, return_tensors='pt', padding=True)
            input_ids = tokenized['input_ids']
            token_type_ids = tokenized['token_type_ids']
            
            # 直接调用forward进行推导
            logits = model(input_ids, token_type_ids, compute_loss=False)
            
            # 用于训练
            optimizer = nn.optimizer.Adam(model.parameters(), lr=2e-5)
            loss, logits = model(input_ids, token_type_ids, compute_loss=False)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        """
        mask = calc_attention_mask(token_type_ids)
        encoder_layers = self.bert(input_ids,
                                   token_type_ids=token_type_ids,
                                   attention_mask=mask)
        logits = self.decoder(encoder_layers)
        if compute_loss:
            labels = input_ids.view(-1)[token_type_ids.view(-1) == 1]
            logits = logits[:, :-1].contiguous()
            target_mask = token_type_ids[:, 1:].contiguous().view(-1)
            predictions = logits.view(-1, self.vocab_size)
            predictions = predictions[target_mask == 1]
            loss_function = nn.CrossEntropyLoss(ignore_index=0)
            loss = loss_function(predictions, labels)
            return loss, logits
        return logits

    def generate(self, text, beam_size=1):
        """根据问题生成表达式
        
        输入:
            text:问题的文本
            beam_size:beam search过程中的top k值
        """
        tokenized = tokenizer(text,
                              max_length=max_question_length,
                              truncation=True,
                              return_tensors='pt')
        token_ids = tokenized['input_ids'].to(device)
        token_type_ids = tokenized['token_type_ids'].to(device)

        token_ids = token_ids.view(1, -1)
        token_type_ids = token_type_ids.view(1, -1)
        out_puts_ids = self.beam_search(token_ids,
                                        token_type_ids,
                                        beam_size=beam_size,
                                        max_length=max_equation_length)
        # 去掉最后的[SEP]
        out_puts_ids = out_puts_ids[:-1]
        decoding_text = tokenizer.decode(out_puts_ids.cpu().numpy(),
                                         spaces_between_special_tokens=False)
        decoding_text = decoding_text.replace(' ', '')
        return decoding_text

    def beam_search(self, token_ids, token_type_ids, max_length, beam_size=1):
        """beam-search操作
        
        这里的max_length指的是生成序列的最大长度
        """

        # 用来保存输出序列
        output_ids = torch.empty(1, 0, dtype=torch.long).to(device)
        # 用来保存累计得分

        with torch.no_grad():
            output_scores = torch.zeros(token_ids.shape[0]).to(device)
            for step in range(max_length):
                if step == 0:
                    scores = self.forward(token_ids, token_type_ids)
                    # 重复beam-size次 输入ids
                    token_ids = token_ids.view(1, -1).repeat(beam_size, 1)
                    token_type_ids = token_type_ids.view(1, -1).repeat(
                        beam_size, 1)
                else:
                    scores = self.forward(new_input_ids, new_token_type_ids)

                logit_score = torch.log_softmax(scores[:, -1], dim=-1)

                logit_score = output_scores.view(-1, 1) + logit_score  # 累计得分
                ## 取topk的时候我们是展平了然后再去调用topk函数
                # 展平
                logit_score = logit_score.view(-1)
                hype_score, hype_pos = torch.topk(logit_score, beam_size)
                indice1 = (hype_pos // scores.shape[-1])  # 行索引
                indice2 = (hype_pos % scores.shape[-1]).long().reshape(
                    -1, 1)  # 列索引

                # 更新得分
                output_scores = hype_score
                output_ids = torch.cat([output_ids[indice1], indice2],
                                       dim=1).long()
                new_input_ids = torch.cat([token_ids, output_ids], dim=1)
                new_token_type_ids = torch.cat(
                    [token_type_ids,
                     torch.ones_like(output_ids).to(device)],
                    dim=1)

                end_counts = (output_ids == tokenizer.sep_token_id).sum(
                    1)  # 统计出现的end标记
                best_one = output_scores.argmax()
                if end_counts[best_one] == 1:
                    # 说明出现终止了~
                    return output_ids[best_one]
                else:
                    # 保留未完成部分
                    flag = (end_counts < 1)  # 标记未完成序列
                    if not flag.all():  # 如果有已完成的
                        token_ids = token_ids[flag]
                        token_type_ids = token_type_ids[flag]
                        new_input_ids = new_input_ids[flag]
                        new_token_type_ids = new_token_type_ids[flag]
                        output_ids = output_ids[flag]  # 扔掉已完成序列
                        output_scores = output_scores[flag]  # 扔掉已完成序列
                        end_counts = end_counts[flag]  # 扔掉已完成end计数
                        beam_size = flag.sum()  # topk相应变化

            return output_ids[output_scores.argmax()]
示例#7
0
 def __init__(self, config, v_feature_size):
     super(MMPreTrainingHeads, self).__init__()
     self.predictions = BertLMPredictionHead(config)
     self.bi_seq_relationship = nn.Linear(config.hidden_size, 2)
     self.imagePredictions = BertImagePredictionHead(config, v_feature_size)
示例#8
0
 def add_lm_head(self):
     self.lm_head = BertLMPredictionHead(self.bert.config)
     self.lm_head.decoder.weights = (
         self.bert.get_input_embeddings().weight.transpose(0, 1))
示例#9
0
 def __init__(self, config):
     super(BertMLMPreTrainingHeads, self).__init__()
     self.predictions = BertLMPredictionHead(config)
示例#10
0
 def __init__(self, name, config):
     super().__init__(name)
     self.loss = CrossEntropyLoss(ignore_index=-1)
     self.vocab_size = config.vocab_size
     self.masked_lm_head = BertLMPredictionHead(config)
示例#11
0
 def __init__(self, config):
     super().__init__()
     self.predictions = BertLMPredictionHead(config)
     self.span_predictions = QuestionAwareSpanSelectionHead(config)
示例#12
0
 def __init__(self, config):
     super().__init__()
     self.predictions = BertLMPredictionHead(config)
     self.sentiment_classification = nn.Linear(config.hidden_size, 3)
示例#13
0
 def __init__(self, config):
     super(BertIMAwControlPreTrainingHeads, self).__init__()
     self.predictions = BertLMPredictionHead(config)
     self.adj_predictions = BertIMAPredictionHead(config)
     self.pos_tagging = BertTokenClassificationHead(config)
示例#14
0
 def __init__(self, config):
     super().__init__()
     self.predictions = BertLMPredictionHead(config)
     self.genderace_prediction = BertGendeRacePredictionHead(config)