def __init__(self, num_choices, bert_config_file, init_embeddings): self.num_choices = num_choices self.bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, self.bert_config) self.bert = BertModel(self.bert_config) self.init_weights() # 初始化权重参数 self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob) # 用于知识表征的词向量矩阵 self.vocab_size, self.embed_size = np.shape(init_embeddings) self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(init_embeddings), freeze=False) #self.classifier = nn.Linear(self.bert_config.hidden_size + self.embed_size, 1) self.classifier = nn.Linear(self.embed_size + self.bert_config.hidden_size, 1) self.A = nn.Parameter(torch.Tensor(self.bert_config.hidden_size, self.embed_size)) self.bias = nn.Parameter(torch.Tensor(1)) # BERT中的[CLS]是先经过Transformer层中MLP最后是layer-norm # 然后经过BertPooler层使用nn.Tanh激活的 self.layer_norm = nn.LayerNorm(self.embed_size, eps=self.bert_config.layer_norm_eps) # self.know_activation = ACT2FN["gelu"] self.know_activation = nn.Tanh() self.activation = nn.Sigmoid() nn.init.xavier_normal_(self.A) self.bias.data.fill_(0)
def __init__(self, config): # super(HeadlessBertForSequenceClassification, self).__init__(config) BertPreTrainedModel.__init__(self, config) HeadlessModelForSequenceClassification.__init__(self, config) self.bert = BertModel(config) self.init_weights()
def __init__(self, config): BertPreTrainedModel.__init__(self, config) HeadlessModelForSequenceClassification.__init__(self, config) # super(HeadlessRobertaForSequenceClassification, self).__init__(config) self.roberta = RobertaModel(config) self.init_weights()
def evaluate(model: BertPreTrainedModel, iterator: DataLoader) -> float: model.eval() total = [] for batch in tqdm(list(iterator), desc='eval'): with torch.no_grad(): loss = model(**batch)[0] total += [loss.item()] model.train() return sum(total) / len(total)
def __init__(self, num_choices, bert_config_file): self.num_choices = num_choices bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, bert_config) self.bert = BertModel(bert_config) self.dropout = nn.Dropout(bert_config.hidden_dropout_prob) self.classifier = nn.Linear(bert_config.hidden_size, 1) self.activation = nn.Sigmoid() self.init_weights()
def __init__(self, config): # Call the init one parent class up. # Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) self.num_labels = config.num_labels # Replace `BertModel` with SparseBertModel. self.bert = bert_cls(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config, opts): BertPreTrainedModel.__init__(self, config) self.use_leaf_rnn = True self.intra_attention = False self.gumbel_temperature = 1 self.bidirectional = True self.model_name = opts.model self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) assert not (self.bidirectional and not self.use_leaf_rnn) word_dim = config.hidden_size hidden_dim = config.hidden_size if self.use_leaf_rnn: self.leaf_rnn_cell = nn.LSTMCell(input_size=word_dim, hidden_size=hidden_dim) if self.bidirectional: self.leaf_rnn_cell_bw = nn.LSTMCell(input_size=word_dim, hidden_size=hidden_dim) else: self.word_linear = nn.Linear(in_features=word_dim, out_features=2 * hidden_dim) if self.bidirectional: self.treelstm_layer = BinaryTreeLSTMLayer(2 * hidden_dim) # self.comp_query = nn.Parameter(torch.FloatTensor(2 * hidden_dim)) self.comp_query_linear = nn.Linear(hidden_dim * 2, 1, bias=False) else: self.treelstm_layer = BinaryTreeLSTMLayer(hidden_dim) # self.comp_query = nn.Parameter(torch.FloatTensor(hidden_dim)) self.comp_query_linear = nn.Linear(hidden_dim, 1, bias=False) self.v_linear = nn.Linear(config.hidden_size * 2, config.hidden_size) emb_hidden_size = config.hidden_size self.register_buffer('enlarged_candidates', torch.arange(opts.len_idiom_vocab)) self.idiom_embedding_u = nn.Embedding(opts.len_idiom_vocab, emb_hidden_size) self.idiom_embedding_v = nn.Embedding(opts.len_idiom_vocab, emb_hidden_size) self.LayerNorm_u = nn.LayerNorm(emb_hidden_size, eps=config.layer_norm_eps) self.LayerNorm_v = nn.LayerNorm(emb_hidden_size, eps=config.layer_norm_eps) self.context_pool = AttentionPool(config.hidden_size, config.hidden_dropout_prob) self.init_weights()
def __init__(self, config): BertPreTrainedModel.__init__(config) XLMPreTrainedModel.__init__(config) self.num_labels = BertPreTrainedModel.config.num_labels self.bert = BertModel(config) self.classifier = nn.Linear(BertPreTrainedModel.config.hidden_size + XLMPreTrainedModel.config.hidden_size, config.num_labels) self.init_weights() #self.num_labels = config.num_labels self.transformer = XLMModel(config) self.init_weights() self.dropout = nn.Dropout(0.1)
def __init__(self, config, add_pooling_layer=True): # Call the init one parent class up. Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) self.config = config self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) if add_pooling_layer else None # Sparsify linear modules. self.sparsify_model() self.init_weights()
def __init__(self, config): # Call the init one parent class up. # Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) if config.is_decoder: logging.warning( # This warning was included with the original BertForMaskedLM. f"If you want to use `{name_prefix}BertForMaskedLM` make sure " " `config.is_decoder=False` for bi-directional self-attention." ) self.bert = bert_cls(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) self.init_weights()
def create_model(model_class: BertPreTrainedModel, encoder_config: BertConfig, tokenizer: BertTokenizer, encoder_path=None, entity_types: dict = None, relation_types: dict = None, prop_drop: float = 0.1, meta_embedding_size: int = 25, size_embeddings_count: int = 10, ed_embeddings_count: int = 300, token_dist_embeddings_count: int = 700, sentence_dist_embeddings_count: int = 50, mention_threshold: float = 0.5, coref_threshold: float = 0.5, rel_threshold: float = 0.5, position_embeddings_count: int = 700, cache_path=None): params = dict( config=encoder_config, # JEREX model parameters cls_token=tokenizer.convert_tokens_to_ids('[CLS]'), entity_types=len(entity_types), relation_types=len(relation_types), prop_drop=prop_drop, meta_embedding_size=meta_embedding_size, size_embeddings_count=size_embeddings_count, ed_embeddings_count=ed_embeddings_count, token_dist_embeddings_count=token_dist_embeddings_count, sentence_dist_embeddings_count=sentence_dist_embeddings_count, mention_threshold=mention_threshold, coref_threshold=coref_threshold, rel_threshold=rel_threshold, tokenizer=tokenizer, cache_dir=cache_path, ) if encoder_path is not None: model = model_class.from_pretrained(encoder_path, **params) else: model = model_class(**params) # conditionally increase position embedding count if encoder_config.max_position_embeddings < position_embeddings_count: old = model.bert.embeddings.position_embeddings new = nn.Embedding(position_embeddings_count, encoder_config.hidden_size) new.weight.data[:encoder_config. max_position_embeddings, :] = old.weight.data model.bert.embeddings.position_embeddings = new model.bert.embeddings.register_buffer( "position_ids", torch.arange(position_embeddings_count).expand((1, -1))) encoder_config.max_position_embeddings = position_embeddings_count return model
def train_epoch(model: BertPreTrainedModel, optimizer: torch.optim.Optimizer, iterator: DataLoader, args: TrainingArguments, num_epoch=0): model.train() train_loss = 0 for step, batch in enumerate(tqdm(iterator, desc="train")): loss = model(**batch)[0] loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() model.zero_grad() train_loss += loss.item() if args.writer: args.writer.add_scalar('Loss/train', loss.item(), num_epoch * len(iterator) + step) if step > 0 and step % args.save_steps == 0: model.save_pretrained(args.output_dir) logger.info(f"epoch: {num_epoch + step / len(iterator)}") logger.info(f"train loss: {train_loss / args.save_steps}") train_loss = 0 model.save_pretrained(args.output_dir)