Exemplo n.º 1
0
 def __init__(self, layer_num, head_num, head_size, weights=None):
     super().__init__()
     hidden_dim = head_num * head_size
     conf = BertConfig(hidden_size=hidden_dim, intermediate_size=4*hidden_dim, num_attention_heads=head_num, num_hidden_layers=layer_num)
     self.encoder = BertEncoder(conf)
     if isinstance(weights, dict):
         w = {}
         for k, v in weights.items():
             if k.startswith('bert.encoder'):
                 w[k[13:]] = weights[k]
         self.encoder.load_state_dict(w)
     else:
         for i in range(layer_num):
             self.encoder.layer[i].attention.self.query.weight.data = weights.w[i][0].transpose(-1, -2).contiguous()
             self.encoder.layer[i].attention.self.query.bias.data = weights.w[i][1]
             self.encoder.layer[i].attention.self.key.weight.data = weights.w[i][2].transpose(-1, -2).contiguous()
             self.encoder.layer[i].attention.self.key.bias.data = weights.w[i][3]
             self.encoder.layer[i].attention.self.value.weight.data = weights.w[i][4].transpose(-1, -2).contiguous()
             self.encoder.layer[i].attention.self.value.bias.data = weights.w[i][5]
             self.encoder.layer[i].attention.output.dense.weight.data = weights.w[i][6].transpose(-1, -2).contiguous()
             self.encoder.layer[i].attention.output.dense.bias.data = weights.w[i][7]
             self.encoder.layer[i].attention.output.LayerNorm.weight.data = weights.w[i][8]
             self.encoder.layer[i].attention.output.LayerNorm.bias.data = weights.w[i][9]
             self.encoder.layer[i].intermediate.dense.weight.data = weights.w[i][10].transpose(-1, -2).contiguous()
             self.encoder.layer[i].intermediate.dense.bias.data = weights.w[i][11]
             self.encoder.layer[i].output.dense.weight.data = weights.w[i][12].transpose(-1, -2).contiguous()
             self.encoder.layer[i].output.dense.bias.data = weights.w[i][13]
             self.encoder.layer[i].output.LayerNorm.weight.data = weights.w[i][14]
             self.encoder.layer[i].output.LayerNorm.bias.data = weights.w[i][15]
     self.head_mask = [None] * layer_num
    def init_data(self, use_cuda) -> None:
        test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')
        if not use_cuda:
            torch.set_num_threads(1)

        torch.set_grad_enabled(False)
        self.cfg = BertConfig()

        self.torch_encoder_layer = BertEncoder(self.cfg)
        self.torch_encoder_layer.eval()

        if use_cuda:
            self.torch_encoder_layer.to(test_device)

        self.batch_size = 1
        self.seq_length = 40
        self.hidden_size = self.cfg.hidden_size
        self.input_tensor = torch.rand(size=(self.batch_size, self.seq_length,
                                             self.hidden_size),
                                       dtype=torch.float32,
                                       device=test_device)

        self.attention_mask = torch.ones((self.batch_size, self.seq_length),
                                         dtype=torch.float32,
                                         device=test_device)
        self.attention_mask = self.attention_mask[:, None, None, :]
        self.attention_mask = (1.0 - self.attention_mask) * -10000.0

        self.turbo_bert_encoder = turbo_transformers.BertEncoder.from_torch(
            self.torch_encoder_layer)
Exemplo n.º 3
0
 def __init__(self, layer_num, head_num, head_size, weights=None):
     super().__init__()
     hidden_dim = head_num * head_size
     conf = BertConfig(hidden_size=hidden_dim,
                       intermediate_size=4 * hidden_dim,
                       num_attention_heads=head_num,
                       num_hidden_layers=layer_num)
     self.encoder = BertEncoder(conf)
     w = {}
     for k, v in weights.weights.items():
         if k.startswith('bert.encoder') and not k.endswith('_amax'):
             w[k[13:]] = weights.weights[k]
     self.encoder.load_state_dict(w)
     self.head_mask = [None] * layer_num
Exemplo n.º 4
0
    def build(self):
        self.text_processor = registry.get(self._datasets[0] +
                                           "_text_processor")
        self.vocab = self.text_processor.vocab
        self.word_embedding = self.vocab.get_embedding(
            torch.nn.Embedding,
            freeze=False,
            embedding_dim=self.config.text_embedding.embedding_dim)
        self.segment_embeddings = nn.Embedding(self.config.num_segment_type,
                                               self.config.hidden_size)

        self.cls_project = nn.Linear(self.config.text_embedding.embedding_dim,
                                     self.config.hidden_size)
        self.lstm = nn.LSTM(**self.config.lstm)
        self.lstm_proj = nn.Linear(self.config.hidden_size * 2,
                                   self.config.hidden_size)
        self.img_encoder = ImageClevrEncoder(self.config)
        self.img_pos_emb = nn.Linear(2, self.config.hidden_size)

        self.LayerNorm = nn.LayerNorm(self.config.hidden_size,
                                      eps=self.config.layer_norm_eps)
        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)

        self.bert_config = BertConfig.from_dict(
            OmegaConf.to_container(self.config, resolve=True))
        self.transformer = BertEncoder(self.bert_config)
        self.pooler = BertPooler(self.bert_config)

        self.classifier = nn.Sequential(
            BertPredictionHeadTransform(self.config),
            nn.Linear(self.config.hidden_size, self.config.num_labels),
        )

        self.head_mask = [None for _ in range(self.config.num_hidden_layers)]
Exemplo n.º 5
0
    def __init__(self, config, num_tune_layer):
        super().__init__(config)
        self.num_labels = config.num_labels
        config.num_tune_layer = num_tune_layer # Layer number to start fine-tuning from

        self.bert = BertModel(config)
        self.freeze_bert_layers(self.bert, num_tune_layer)
        self.dropout = nn.Dropout(config.classifier_dropout_prob)

        self.word_attn_linear = nn.Linear(config.hidden_size, config.hidden_size)
        self.word_attn_vector = nn.Parameter(norm_weight(config.hidden_size, None))
        
        self.sent_attn_linear_nocontext = nn.Linear(config.hidden_size, config.hidden_size)
        self.sent_attn_vector_nocontext = nn.Parameter(norm_weight(config.hidden_size, None))
        
        self.sent_attn_linear_context = nn.Linear(config.hidden_size, config.hidden_size)
        self.sent_attn_vector_context = nn.Parameter(norm_weight(config.hidden_size, None))

        self.classifier_esm = nn.Linear(config.hidden_size, self.config.num_labels)
        self.classifier_agg = nn.Linear(config.hidden_size, self.config.num_labels)
        
        self.config_custom = copy.deepcopy(config)
        self.config_custom.num_hidden_layers = 2
        self.bert_encoder_custom = BertEncoder(self.config_custom)
        
        self.beta1 = nn.Parameter(torch.Tensor([0.5]))
        self.beta2 = nn.Parameter(torch.Tensor([0.5]))

        self.init_weights()
    def __init__(self, config):
        super(BertModelDialog, self).__init__(config)

        self.embeddings = BertEmbeddingsDialog(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.init_weights()
Exemplo n.º 7
0
    def __init__(self, config):
        super(BertModel, self).__init__(config)

        self.embeddings = SuperPositionalBertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.init_weights()
Exemplo n.º 8
0
    def __init__(
        self,
        config,
        visual_embedding_dim=512,
        embedding_strategy="plain",
        bypass_transformer=False,
        output_attentions=False,
        output_hidden_states=False,
    ):
        super().__init__(config)
        self.config = config

        config.visual_embedding_dim = visual_embedding_dim
        config.embedding_strategy = embedding_strategy
        config.bypass_transformer = bypass_transformer
        config.output_attentions = output_attentions
        config.output_hidden_states = output_hidden_states

        self.embeddings = BertVisioLinguisticEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.bypass_transformer = config.bypass_transformer

        if self.bypass_transformer:
            self.additional_layer = BertLayer(config)

        self.output_attentions = self.config.output_attentions
        self.output_hidden_states = self.config.output_hidden_states
        self.fixed_head_masks = [None for _ in range(len(self.encoder.layer))]
        self.init_weights()
Exemplo n.º 9
0
    def __init__(self, config):
        super().__init__(config)

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        # self.apply(self.init_weights)  # old versions of pytorch_transformers
        self.init_weights()
Exemplo n.º 10
0
    def __init__(self, cfg):
        super(TransfomerModel, self).__init__()
        self.cfg = cfg
        cont_col_size = len(cfg.cont_cols)
        self.cont_emb = nn.Sequential(
            nn.Linear(cont_col_size, cfg.hidden_size),
            nn.LayerNorm(cfg.hidden_size),
        )
        self.position_emb = nn.Embedding(num_embeddings=self.cfg.seq_len, embedding_dim=cfg.hidden_size)
        self.ln = nn.LayerNorm(cfg.hidden_size)
        self.config = BertConfig(
            3,  # not used
            hidden_size=cfg.hidden_size,
            num_hidden_layers=cfg.nlayers,
            num_attention_heads=cfg.nheads,
            intermediate_size=cfg.hidden_size,
            hidden_dropout_prob=cfg.dropout,
            attention_probs_dropout_prob=cfg.dropout,
        )
        self.encoder = BertEncoder(self.config)

        def get_reg():
            return nn.Sequential(
                nn.Linear(cfg.hidden_size, cfg.hidden_size),
                nn.LayerNorm(cfg.hidden_size),
                nn.Dropout(cfg.dropout),
                nn.ReLU(),
                nn.Linear(cfg.hidden_size, cfg.hidden_size),
                nn.LayerNorm(cfg.hidden_size),
                nn.Dropout(cfg.dropout),
                nn.ReLU(),
                nn.Linear(cfg.hidden_size, cfg.target_size),
            )

        self.reg_layer = get_reg()
Exemplo n.º 11
0
 def __init__(self, cfg, args, tok):
     super().__init__(cfg)
     self.embeddings = VideoTransformerEmbedder(cfg, args, tok)
     self.encoder = BertEncoder(cfg)
     self.init_weights()
     self.args = args
     self.tok = tok
     self.cfg = cfg
 def __init__(self, config):
     super().__init__(config)
     self.embeddings = ElectraEmbeddings(config)
     self.embeddings_project = nn.Linear(config.embedding_size,
                                         config.hidden_size)
     self.encoder = BertEncoder(config)
     self.config = config
     self.init_weights()
Exemplo n.º 13
0
    def __init__(self, config):
        super(LayoutLMModel, self).__init__(config)

        self.embeddings = LayoutLMEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Exemplo n.º 14
0
    def __init__(self, **kwargs):
        config = BertConfig(**kwargs)
        super().__init__(config)

        self.prev_pred_embeddings = PrevPredEmbeddings(config)

        self.encoder = BertEncoder(config)
        self.init_weights()
    def __init__(self, config):
        super().__init__(config)
        # self.config中包含了拼写错误纠正网络Correction_Network中的Bert模型的各种配置超参数.
        self.config = config
        '''一、构建错误探查网络Detection_Network中所需的网络层'''

        # Bi-GRU网络作为错误探查网络Detection_Network的编码器
        # 此处由于BertModel中的embeddings层中所有子嵌入模块的嵌入维度都为768, 所以此处Bi-GRU网络的input_size也为768,
        # 而将Bi-GRU网络的hidden_size设为256,是为了保证Bi-GRU网络双向编码后双向隐藏层拼接到一块后隐藏层维度能保持在512.
        # 此时enc_hid_size为512.
        self.enc_bi_gru = torch.nn.GRU(input_size=768,
                                       hidden_size=256,
                                       dropout=0.2,
                                       bidirectional=True)

        # 双向GRU编码层对于输入错误探查网络Detection_Network中的input_embeddings进行双向编码,
        # 此时双向GRU编码层的输出为(seq_len, batch_size, enc_hid_size * 2),将其交换维度变形为(batch_size, seq_len, enc_hid_size * 2),
        # 再将双向GRU编码层的变形后的输出输入self.detection_network_dense_out层中,映射为形状(batch_size, seq_len, 2)的张量,
        # 这样方便后面进行判断句子序列中每一个字符是否为拼写错误字符的二分类任务的交叉熵损失值计算.
        self.detection_network_dense_out = torch.nn.Linear(512, 2)

        # 同时,将双向GRU编码层输出后经过变形的形状为(batch_size, seq_len, enc_hid_size * 2),的张量输入进soft_masking_coef_mapping层中,
        # 将其形状映射为(batch_size, seq_len, 1)的张量,此张量再在后面输入进Sigmoid()激活函数中, 将此张量的值映射至(0,1)之间,
        # 这样这个张量即变为了后面计算soft-masked embeddings时和mask_embeddings相乘的系数p (结果pi即可表示为文本序列中第i处的字符拼写错误的似然概率(likelihood)).
        self.soft_masking_coef_mapping = torch.nn.Linear(512, 1)
        '''二、构建的拼写错误纠正网络Correction_Network中BertModel中所用的个三种网络层'''
        '''
        (1): 嵌入层BertEmbeddings(),其中包含了每个character的word embedding、segment embeddings、position embedding三种嵌入函数.
        (2): Bert模型的核心,多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
        (3): Bert模型最后的池化层BertPooler.
        '''
        # 嵌入层BertEmbeddings().
        self.embeddings = BertEmbeddings(config)
        # 多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
        self.encoder = BertEncoder(config)
        # 池化层BertPooler。
        self.pooler = BertPooler(config)
        # 初始化权重矩阵,偏置等.
        self.init_weights()
        '''获取遮罩特殊符[MASK]在Bert模型的嵌入层BertEmbeddings()中的词嵌入层word_embeddings层中特殊符[MASK]所对应索引的嵌入向量(embeddins vector)'''
        # 在Bert模型的tokenizer类BertTokenizer()的词表中,遮罩特殊符[MASK]会被编码为索引103(只要是BertTokenizer()类,无论其from_pretrained哪种
        # 预训练的Bert模型词表,遮罩特殊符[MASK]在词表中的索引都为103; 除非换预训练模型如换成Albert模型,遮罩特殊符[MASK]在词表中的索引才会变, 否则
        # 遮罩特殊符[MASK]在同一类预训练Bert模型的词表下索引不变).
        # 在之后, 遮罩特殊符[MASK]的张量self.mask_embedding的形状要变为和Bert模型嵌入层BertEmbeddings()的输出input_embeddings张量的形状一样,
        # 此时self.mask_embeddings张量的形状要为(batch_size, seq_len, embed_size)->(batch_size, seq_len, 768).
        self.mask_embeddings = self.embeddings.word_embeddings.weight[
            103]  # 此时,mask_embedding张量的形状为(768,)

        # 注意!: 在soft_masked_embeddings输入拼写错误纠正网络correction network中的Bert模型后,其计算结果输入进最终的输出层与Softmax层之前,
        # 拼写错误纠正网络correction network的结果需通过残差连接residual connection与输入模型一开始的input embeddings相加,
        # 相加的结果才输入最终的输出层与Softmax层中做最终的正确字符预测。
        '''self.soft_masked_bert_dense_out即为拼写错误纠正网络correction network之后的输出层, 其会将经过残差连接模块residual connection之后
           的输出的维度由768投影到纠错词表的索引空间. (此处输出层self.soft_masked_bert_dense_out的输出即可被视为Soft_Masked_BERT模型的最终输出)'''
        self.soft_masked_bert_dense_out = torch.nn.Linear(
            self.config.hidden_size,
            self.embeddings.word_embeddings.weight.shape[0])
        '''此处可不写最后的Softmax()函数, 因为若之后在训练模型时使用CrossEntropyLoss()交叉熵函数来计算损失值的话, CrossEntropyLoss()函数
Exemplo n.º 16
0
 def _create_span_encoder(self, kb, span_encoder_config):
     # check if encoder should be used
     if span_encoder_config is None:
         # return identity function as encoder
         return lambda t, m, h: t
     # update values to match dimensions
     span_encoder_config.hidden_size = kb.embedd_dim
     # create config and encoder
     return BertEncoder(span_encoder_config)
Exemplo n.º 17
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddingsWithWordMasking(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Exemplo n.º 18
0
Arquivo: base.py Projeto: xuihau/G2GTr
    def __init__(self, config):
        super(BertBaseModel, self).__init__(config)
        self.config = config

        self.embeddings = BertBaseEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
Exemplo n.º 19
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertCharacterEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.init_weights()
        self.use_ext_encoder = False
Exemplo n.º 21
0
    def __init__(self, config, v_dim, l_dim, loc_dim, backbone):
        super(TransformerHead, self).__init__()
        self.config = config.MODEL.MMSS_HEAD.TRANSFORMER
        self.v_dim = v_dim
        self.l_dim = l_dim
        self.loc_dim = loc_dim
        self.backbone = backbone

        self.mvm_loss = self.config.MVM_LOSS
        self.mmm_loss = self.config.MMM_LOSS
        self.num_negative = self.config.MVM_LOSS_NUM_NEGATIVE

        self.bert_config = BertConfig(**self.config.BERT_CONFIG)
        self.v2l_projection = nn.Linear(self.v_dim, self.l_dim)
        self.visual_emb = VisualEmbedding(self.bert_config, self.l_dim, self.loc_dim)
        self.encoder = BertEncoder(self.bert_config)
        self.pooler = BertPooler(self.bert_config)
        self.heads = MMPreTrainingHeads(self.bert_config, self.v_dim)

        self.encoder.apply(self._init_weights)
        self.pooler.apply(self._init_weights)
        self.heads.apply(self._init_weights)

        self._tie_weights()

        self.loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
        if self.mvm_loss == 'reconstruction_error':
            self.vis_criterion = nn.MSELoss(reduction="none")
        elif self.mvm_loss == 'contrastive_cross_entropy':
            self.vis_criterion = nn.CrossEntropyLoss()
        elif self.mvm_loss == '':
            self.vis_criterion = None
            for p in self.heads.imagePredictions.parameters():
                p.requires_grad = False
        else:
            raise NotImplementedError

        if self.mmm_loss == '':
            for p in self.pooler.parameters():
                p.requires_grad = False
            for p in self.heads.bi_seq_relationship.parameters():
                p.requires_grad = False
Exemplo n.º 22
0
    def __init__(self, config):
        super().__init__(config)

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        #self.pooler = BertPooler(config)
        self.pooler = BertPooler_Sigmoid(config)
        #self.pooler = BertPooler_reLu(config)

        #self.apply(self.init_weights)
        self.init_weights()
Exemplo n.º 23
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        # 直接删除Pooler层,减少计算量和显存开销
        # self.pooler = BertPooler(config) if add_pooling_layer else None

        self.init_weights()
Exemplo n.º 24
0
    def __init__(self,config):
        super().__init__(config)
        self.config = config
        self.jointEmbeddings = JointEmbeddings(config.hidden_size,0.5,'mosei')
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        #self.Linear_v = nn.Linear()

        self.init_weights()
 def __init__(self):
     super(BertClassificationModel, self).__init__()
     model_class, tokenizer_class, pretrained_weights = (tfs.BertModel, tfs.BertTokenizer, 'bert-base-chinese')
     self.tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
     # 嵌入层BertEmbeddings().
     self.embeddings = BertEmbeddings(config)
     # 多层(12层)多头自注意力(multi-head self attention)编码层BertEncoder.
     self.encoder = BertEncoder(config)
     self.bert = model_class.from_pretrained(pretrained_weights)
     self.dense = nn.Linear(768, 2)  # bert默认的隐藏单元数是768, 输出单元是2,表示二分类
     self.dropout = nn.Dropout(p=0.5)  # dropout训练
    def __init__(self, config):
        super().__init__(config)
        self.embeddings = ElectraEmbeddings(config)

        if config.embedding_size != config.hidden_size:
            self.embeddings_project = nn.Linear(config.embedding_size,
                                                config.hidden_size)
        self.encoder = BertEncoder(config)
        self.dense = nn.Linear(256, 256)
        self.dropout = nn.Dropout(0.1)
        self.out_proj = nn.Linear(256, 2)
        self.init_weights()
Exemplo n.º 27
0
class HuggingFaceEncoder(torch.nn.Module):
    def __init__(self, layer_num, head_num, head_size, weights=None):
        super().__init__()
        hidden_dim = head_num * head_size
        conf = BertConfig(hidden_size=hidden_dim,
                          intermediate_size=4 * hidden_dim,
                          num_attention_heads=head_num,
                          num_hidden_layers=layer_num)
        self.encoder = BertEncoder(conf)
        w = {}
        for k, v in weights.weights.items():
            if k.startswith('bert.encoder') and not k.endswith('_amax'):
                w[k[13:]] = weights.weights[k]
        self.encoder.load_state_dict(w)
        self.head_mask = [None] * layer_num

    def forward(self, hidden_states, attention_mask):
        extended_attention_mask = (1.0 - attention_mask) * -10000.0
        output = self.encoder(hidden_states, extended_attention_mask,
                              self.head_mask)
        return output
Exemplo n.º 28
0
    def __init__(self, config: LukeConfig):
        super(LukeModelDoc, self).__init__()
        self.config = config

        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        if self.config.bert_model_name and "roberta" in self.config.bert_model_name:
            self.embeddings = RobertaEmbeddings(config)
            self.embeddings.token_type_embeddings.requires_grad = False
        else:
            self.embeddings = BertEmbeddings(config)
        self.entity_embeddings = EntityEmbeddings(config)
Exemplo n.º 29
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.num_labels = config.num_labels
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.inject = DotAttention(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
Exemplo n.º 30
0
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self.embeddings = BertEmbeddings(config)
     # transformer blocks * N
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.MAG = MAG(beta=1.0,
                    hidden_size=hidden_size,
                    dropout=0.5,
                    device=device)
     self.MAG.apply(_init_weights)
     self.init_weights()