示例#1
0
    def __init__(self, num_choices, bert_config_file, init_embeddings):
        self.num_choices = num_choices
        self.bert_config = BertConfig.from_json_file(bert_config_file)
        BertPreTrainedModel.__init__(self, self.bert_config)

        self.bert = BertModel(self.bert_config)
        self.init_weights()    # 初始化权重参数
        self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob)

        # 用于知识表征的词向量矩阵
        self.vocab_size, self.embed_size = np.shape(init_embeddings)
        self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(init_embeddings), freeze=False)

        #self.classifier = nn.Linear(self.bert_config.hidden_size + self.embed_size, 1)
        self.classifier = nn.Linear(self.embed_size + self.bert_config.hidden_size, 1)
        self.A = nn.Parameter(torch.Tensor(self.bert_config.hidden_size, self.embed_size))
        self.bias = nn.Parameter(torch.Tensor(1))

        # BERT中的[CLS]是先经过Transformer层中MLP最后是layer-norm
        # 然后经过BertPooler层使用nn.Tanh激活的
        self.layer_norm = nn.LayerNorm(self.embed_size, eps=self.bert_config.layer_norm_eps)
        # self.know_activation = ACT2FN["gelu"]
        self.know_activation = nn.Tanh()

        self.activation = nn.Sigmoid()

        nn.init.xavier_normal_(self.A)
        self.bias.data.fill_(0)
    def __init__(self, config):
        # super(HeadlessBertForSequenceClassification, self).__init__(config)
        BertPreTrainedModel.__init__(self, config)
        HeadlessModelForSequenceClassification.__init__(self, config)

        self.bert = BertModel(config)

        self.init_weights()
    def __init__(self, config):
        BertPreTrainedModel.__init__(self, config)
        HeadlessModelForSequenceClassification.__init__(self, config)
        # super(HeadlessRobertaForSequenceClassification, self).__init__(config)

        self.roberta = RobertaModel(config)

        self.init_weights()
示例#4
0
def evaluate(model: BertPreTrainedModel, iterator: DataLoader) -> float:
    model.eval()
    total = []
    for batch in tqdm(list(iterator), desc='eval'):
        with torch.no_grad():
            loss = model(**batch)[0]
        total += [loss.item()]
    model.train()
    return sum(total) / len(total)
示例#5
0
 def __init__(self, num_choices, bert_config_file):
     self.num_choices = num_choices
     bert_config = BertConfig.from_json_file(bert_config_file)
     BertPreTrainedModel.__init__(self, bert_config)
     self.bert = BertModel(bert_config)
     self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
     self.classifier = nn.Linear(bert_config.hidden_size, 1)
     self.activation = nn.Sigmoid()
     self.init_weights()
示例#6
0
        def __init__(self, config):
            # Call the init one parent class up.
            # Otherwise, the model will be defined twice.
            BertPreTrainedModel.__init__(self, config)

            self.num_labels = config.num_labels

            # Replace `BertModel` with SparseBertModel.
            self.bert = bert_cls(config)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)
            self.classifier = nn.Linear(config.hidden_size, config.num_labels)

            self.init_weights()
示例#7
0
    def __init__(self, config, opts):
        BertPreTrainedModel.__init__(self, config)
        self.use_leaf_rnn = True
        self.intra_attention = False
        self.gumbel_temperature = 1
        self.bidirectional = True

        self.model_name = opts.model
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        assert not (self.bidirectional and not self.use_leaf_rnn)

        word_dim = config.hidden_size
        hidden_dim = config.hidden_size
        if self.use_leaf_rnn:
            self.leaf_rnn_cell = nn.LSTMCell(input_size=word_dim,
                                             hidden_size=hidden_dim)
            if self.bidirectional:
                self.leaf_rnn_cell_bw = nn.LSTMCell(input_size=word_dim,
                                                    hidden_size=hidden_dim)
        else:
            self.word_linear = nn.Linear(in_features=word_dim,
                                         out_features=2 * hidden_dim)
        if self.bidirectional:
            self.treelstm_layer = BinaryTreeLSTMLayer(2 * hidden_dim)
            # self.comp_query = nn.Parameter(torch.FloatTensor(2 * hidden_dim))
            self.comp_query_linear = nn.Linear(hidden_dim * 2, 1, bias=False)
        else:
            self.treelstm_layer = BinaryTreeLSTMLayer(hidden_dim)
            # self.comp_query = nn.Parameter(torch.FloatTensor(hidden_dim))
            self.comp_query_linear = nn.Linear(hidden_dim, 1, bias=False)

        self.v_linear = nn.Linear(config.hidden_size * 2, config.hidden_size)

        emb_hidden_size = config.hidden_size
        self.register_buffer('enlarged_candidates',
                             torch.arange(opts.len_idiom_vocab))
        self.idiom_embedding_u = nn.Embedding(opts.len_idiom_vocab,
                                              emb_hidden_size)
        self.idiom_embedding_v = nn.Embedding(opts.len_idiom_vocab,
                                              emb_hidden_size)
        self.LayerNorm_u = nn.LayerNorm(emb_hidden_size,
                                        eps=config.layer_norm_eps)
        self.LayerNorm_v = nn.LayerNorm(emb_hidden_size,
                                        eps=config.layer_norm_eps)

        self.context_pool = AttentionPool(config.hidden_size,
                                          config.hidden_dropout_prob)

        self.init_weights()
示例#8
0
    def __init__(self, config):
        BertPreTrainedModel.__init__(config)
        XLMPreTrainedModel.__init__(config)
        self.num_labels = BertPreTrainedModel.config.num_labels
        self.bert = BertModel(config)

        self.classifier = nn.Linear(BertPreTrainedModel.config.hidden_size + XLMPreTrainedModel.config.hidden_size, config.num_labels)
        self.init_weights()

        #self.num_labels = config.num_labels
        self.transformer = XLMModel(config)
        self.init_weights()

        self.dropout = nn.Dropout(0.1)
示例#9
0
    def __init__(self, config, add_pooling_layer=True):
        # Call the init one parent class up. Otherwise, the model will be defined twice.
        BertPreTrainedModel.__init__(self, config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        self.pooler = BertPooler(config) if add_pooling_layer else None

        # Sparsify linear modules.
        self.sparsify_model()

        self.init_weights()
示例#10
0
        def __init__(self, config):

            # Call the init one parent class up.
            # Otherwise, the model will be defined twice.
            BertPreTrainedModel.__init__(self, config)

            if config.is_decoder:
                logging.warning(
                    # This warning was included with the original BertForMaskedLM.
                    f"If you want to use `{name_prefix}BertForMaskedLM` make sure "
                    " `config.is_decoder=False` for bi-directional self-attention."
                )

            self.bert = bert_cls(config, add_pooling_layer=False)
            self.cls = BertOnlyMLMHead(config)

            self.init_weights()
示例#11
0
def create_model(model_class: BertPreTrainedModel,
                 encoder_config: BertConfig,
                 tokenizer: BertTokenizer,
                 encoder_path=None,
                 entity_types: dict = None,
                 relation_types: dict = None,
                 prop_drop: float = 0.1,
                 meta_embedding_size: int = 25,
                 size_embeddings_count: int = 10,
                 ed_embeddings_count: int = 300,
                 token_dist_embeddings_count: int = 700,
                 sentence_dist_embeddings_count: int = 50,
                 mention_threshold: float = 0.5,
                 coref_threshold: float = 0.5,
                 rel_threshold: float = 0.5,
                 position_embeddings_count: int = 700,
                 cache_path=None):
    params = dict(
        config=encoder_config,
        # JEREX model parameters
        cls_token=tokenizer.convert_tokens_to_ids('[CLS]'),
        entity_types=len(entity_types),
        relation_types=len(relation_types),
        prop_drop=prop_drop,
        meta_embedding_size=meta_embedding_size,
        size_embeddings_count=size_embeddings_count,
        ed_embeddings_count=ed_embeddings_count,
        token_dist_embeddings_count=token_dist_embeddings_count,
        sentence_dist_embeddings_count=sentence_dist_embeddings_count,
        mention_threshold=mention_threshold,
        coref_threshold=coref_threshold,
        rel_threshold=rel_threshold,
        tokenizer=tokenizer,
        cache_dir=cache_path,
    )

    if encoder_path is not None:
        model = model_class.from_pretrained(encoder_path, **params)
    else:
        model = model_class(**params)

    # conditionally increase position embedding count
    if encoder_config.max_position_embeddings < position_embeddings_count:
        old = model.bert.embeddings.position_embeddings

        new = nn.Embedding(position_embeddings_count,
                           encoder_config.hidden_size)
        new.weight.data[:encoder_config.
                        max_position_embeddings, :] = old.weight.data
        model.bert.embeddings.position_embeddings = new
        model.bert.embeddings.register_buffer(
            "position_ids",
            torch.arange(position_embeddings_count).expand((1, -1)))

        encoder_config.max_position_embeddings = position_embeddings_count

    return model
示例#12
0
def train_epoch(model: BertPreTrainedModel, optimizer: torch.optim.Optimizer, iterator: DataLoader,
                args: TrainingArguments, num_epoch=0):
    model.train()
    train_loss = 0
    for step, batch in enumerate(tqdm(iterator, desc="train")):
        loss = model(**batch)[0]
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
        optimizer.step()
        model.zero_grad()
        train_loss += loss.item()
        if args.writer:
            args.writer.add_scalar('Loss/train', loss.item(), num_epoch * len(iterator) + step)
        if step > 0 and step % args.save_steps == 0:
            model.save_pretrained(args.output_dir)
            logger.info(f"epoch: {num_epoch + step / len(iterator)}")
            logger.info(f"train loss: {train_loss / args.save_steps}")
            train_loss = 0
    model.save_pretrained(args.output_dir)