def __init__(self, pretrained_model, vocab_size): super().__init__() self.vocab_size = vocab_size config = BertConfig(vocab_size=vocab_size) self.bert = BertModel.from_pretrained(pretrained_model, return_dict=True) self.decoder = BertLMPredictionHead(config) self.decoder.decoder.weight.data = self.bert.embeddings.word_embeddings.weight.data for param in self.bert.parameters(): param.requires_grad = True for param in self.decoder.parameters(): param.requires_grad = True
def __init__(self, config, num_features): super().__init__() self.predictions = BertLMPredictionHead(config) self.num_features = num_features self.feature_predictions = nn.ModuleDict() for feature in range(num_features): self.feature_predictions[f'feature_{feature+1}_prediction'] = BertForOneFeaturePredictionHead(config)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.lm_layer = BertLMPredictionHead(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.init_weights()
def __init__(self, config): super().__init__() self.predictions = BertLMPredictionHead(config) self.topic_treat_prediction = BertTopicTreatPredictionHead(config)
def __init__(self, config): super().__init__() self.predictions = BertLMPredictionHead(config) self.span_predictions = SpanPredictionHead(config)
class UniLMModel(nn.Module): def __init__(self, pretrained_model, vocab_size): super().__init__() self.vocab_size = vocab_size config = BertConfig(vocab_size=vocab_size) self.bert = BertModel.from_pretrained(pretrained_model, return_dict=True) self.decoder = BertLMPredictionHead(config) self.decoder.decoder.weight.data = self.bert.embeddings.word_embeddings.weight.data for param in self.bert.parameters(): param.requires_grad = True for param in self.decoder.parameters(): param.requires_grad = True def forward(self, input_ids, token_type_ids, compute_loss=False): """ 参数: input_ids:一个batch的输入token id,用[PAD]作为填充符号 token_type_ids:指示这个句子归属于setence1还是sentence2 compute_loss:如果为True,则在forward的最后计算loss,计算方式为交叉熵 使用方法: pretrained_model = 'hfl/chinese-bert-wwm' tokenizer = BertTokenizer.from_pretrained(pretrained_model) model = UniLMModel(pretrained_model, tokenizer.vocab_size) question = ['小明有3个苹果,小红的苹果是小明的3倍,小红有几个苹果?', '正方形的周长为12,它的边长是多少'] equation = ['3*5', '12/4'] tokenized = tokenizer(question, equation, return_tensors='pt', padding=True) input_ids = tokenized['input_ids'] token_type_ids = tokenized['token_type_ids'] # 直接调用forward进行推导 logits = model(input_ids, token_type_ids, compute_loss=False) # 用于训练 optimizer = nn.optimizer.Adam(model.parameters(), lr=2e-5) loss, logits = model(input_ids, token_type_ids, compute_loss=False) optimizer.zero_grad() loss.backward() optimizer.step() """ mask = calc_attention_mask(token_type_ids) encoder_layers = self.bert(input_ids, token_type_ids=token_type_ids, attention_mask=mask) logits = self.decoder(encoder_layers) if compute_loss: labels = input_ids.view(-1)[token_type_ids.view(-1) == 1] logits = logits[:, :-1].contiguous() target_mask = token_type_ids[:, 1:].contiguous().view(-1) predictions = logits.view(-1, self.vocab_size) predictions = predictions[target_mask == 1] loss_function = nn.CrossEntropyLoss(ignore_index=0) loss = loss_function(predictions, labels) return loss, logits return logits def generate(self, text, beam_size=1): """根据问题生成表达式 输入: text:问题的文本 beam_size:beam search过程中的top k值 """ tokenized = tokenizer(text, max_length=max_question_length, truncation=True, return_tensors='pt') token_ids = tokenized['input_ids'].to(device) token_type_ids = tokenized['token_type_ids'].to(device) token_ids = token_ids.view(1, -1) token_type_ids = token_type_ids.view(1, -1) out_puts_ids = self.beam_search(token_ids, token_type_ids, beam_size=beam_size, max_length=max_equation_length) # 去掉最后的[SEP] out_puts_ids = out_puts_ids[:-1] decoding_text = tokenizer.decode(out_puts_ids.cpu().numpy(), spaces_between_special_tokens=False) decoding_text = decoding_text.replace(' ', '') return decoding_text def beam_search(self, token_ids, token_type_ids, max_length, beam_size=1): """beam-search操作 这里的max_length指的是生成序列的最大长度 """ # 用来保存输出序列 output_ids = torch.empty(1, 0, dtype=torch.long).to(device) # 用来保存累计得分 with torch.no_grad(): output_scores = torch.zeros(token_ids.shape[0]).to(device) for step in range(max_length): if step == 0: scores = self.forward(token_ids, token_type_ids) # 重复beam-size次 输入ids token_ids = token_ids.view(1, -1).repeat(beam_size, 1) token_type_ids = token_type_ids.view(1, -1).repeat( beam_size, 1) else: scores = self.forward(new_input_ids, new_token_type_ids) logit_score = torch.log_softmax(scores[:, -1], dim=-1) logit_score = output_scores.view(-1, 1) + logit_score # 累计得分 ## 取topk的时候我们是展平了然后再去调用topk函数 # 展平 logit_score = logit_score.view(-1) hype_score, hype_pos = torch.topk(logit_score, beam_size) indice1 = (hype_pos // scores.shape[-1]) # 行索引 indice2 = (hype_pos % scores.shape[-1]).long().reshape( -1, 1) # 列索引 # 更新得分 output_scores = hype_score output_ids = torch.cat([output_ids[indice1], indice2], dim=1).long() new_input_ids = torch.cat([token_ids, output_ids], dim=1) new_token_type_ids = torch.cat( [token_type_ids, torch.ones_like(output_ids).to(device)], dim=1) end_counts = (output_ids == tokenizer.sep_token_id).sum( 1) # 统计出现的end标记 best_one = output_scores.argmax() if end_counts[best_one] == 1: # 说明出现终止了~ return output_ids[best_one] else: # 保留未完成部分 flag = (end_counts < 1) # 标记未完成序列 if not flag.all(): # 如果有已完成的 token_ids = token_ids[flag] token_type_ids = token_type_ids[flag] new_input_ids = new_input_ids[flag] new_token_type_ids = new_token_type_ids[flag] output_ids = output_ids[flag] # 扔掉已完成序列 output_scores = output_scores[flag] # 扔掉已完成序列 end_counts = end_counts[flag] # 扔掉已完成end计数 beam_size = flag.sum() # topk相应变化 return output_ids[output_scores.argmax()]
def __init__(self, config, v_feature_size): super(MMPreTrainingHeads, self).__init__() self.predictions = BertLMPredictionHead(config) self.bi_seq_relationship = nn.Linear(config.hidden_size, 2) self.imagePredictions = BertImagePredictionHead(config, v_feature_size)
def add_lm_head(self): self.lm_head = BertLMPredictionHead(self.bert.config) self.lm_head.decoder.weights = ( self.bert.get_input_embeddings().weight.transpose(0, 1))
def __init__(self, config): super(BertMLMPreTrainingHeads, self).__init__() self.predictions = BertLMPredictionHead(config)
def __init__(self, name, config): super().__init__(name) self.loss = CrossEntropyLoss(ignore_index=-1) self.vocab_size = config.vocab_size self.masked_lm_head = BertLMPredictionHead(config)
def __init__(self, config): super().__init__() self.predictions = BertLMPredictionHead(config) self.span_predictions = QuestionAwareSpanSelectionHead(config)
def __init__(self, config): super().__init__() self.predictions = BertLMPredictionHead(config) self.sentiment_classification = nn.Linear(config.hidden_size, 3)
def __init__(self, config): super(BertIMAwControlPreTrainingHeads, self).__init__() self.predictions = BertLMPredictionHead(config) self.adj_predictions = BertIMAPredictionHead(config) self.pos_tagging = BertTokenClassificationHead(config)
def __init__(self, config): super().__init__() self.predictions = BertLMPredictionHead(config) self.genderace_prediction = BertGendeRacePredictionHead(config)