def __init__(self, config): super(BertForIREmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertPredictionHeadTransform, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, str)): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
def __init__(self, config): super(BertPredictionHeadTransform, self).__init__() # Need to unty it when we separate the dimensions of hidden and emb self.dense = nn.Linear(config.hidden_size, config.hidden_size) if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size) self.LayerNorm = BertLayerNorm(config.embedding_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) # TODO:ROBERTA暂时存在一些问题,必须512才能加载一些模型,但是部分模型却不是用512长度训练的,要注意 self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, in_hsz, out_hsz, layer_norm=True, dropout=0.1, relu=True): super(LinearLayer, self).__init__() self.relu = relu self.layer_norm = layer_norm if layer_norm: self.LayerNorm = BertLayerNorm(in_hsz, eps=1e-5) layers = [ nn.Dropout(dropout), nn.Linear(in_hsz, out_hsz) ] self.net = nn.Sequential(*layers)
def __init__(self, config): super(DepBertPredictionHeadTransform, self).__init__() self.child_transform = nn.Linear(config.hidden_size, int(config.hidden_size / 3)) self.head_transform = nn.Linear(config.hidden_size, int(config.hidden_size / 3)) if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) try: self.use_relative_position = config.use_relative_position except: self.use_relative_position = False if not self.use_relative_position: self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertEmbeddings, self).__init__() self.doc_embeddings_len = config.hidden_size self.word_embeddings_len = config.hidden_size self.position_embeddings_len = config.hidden_size self.word_embeddings = nn.Embedding(config.vocab_size, self.word_embeddings_len) self.position_embeddings = nn.Embedding(config.max_position_embeddings, self.position_embeddings_len) self.doc_embeddings = nn.Embedding(config.type_vocab_size, self.doc_embeddings_len) # self.interact = nn.Parameter(torch.FloatTensor(self.doc_embeddings_len, self.word_embeddings_len).unsqueeze(0)) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.hidden_size = config.hidden_size
def __init__(self, config, roberta_model_embedding_weights): super().__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) #self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False) self.decoder = nn.Linear(roberta_model_embedding_weights.size(1), roberta_model_embedding_weights.size(0), bias=False) self.decoder.weight = roberta_model_embedding_weights self.bias = nn.Parameter( torch.zeros(roberta_model_embedding_weights.size(0))) #self.bias = nn.Parameter(torch.zeros(config.vocab_size)) # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings` self.decoder.bias = self.bias
def __init__(self, config): super(BertOutput, self).__init__() self.dense = QuantizeLinear( config.intermediate_size, config.hidden_size, clip_val=config.clip_init_val, weight_bits=config.weight_bits, input_bits=config.input_bits, weight_layerwise=config.weight_layerwise, input_layerwise=config.input_layerwise, weight_quant_method=config.weight_quant_method, input_quant_method=config.input_quant_method, learnable=config.learnable_scaling, symmetric=config.sym_quant_ffn_attn) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config, bert_model_embedding_weights): super(BertLMPredictionHead, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) if isinstance(config.hidden_act, str): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. self.decoder = nn.Linear(bert_model_embedding_weights.size(1), bert_model_embedding_weights.size(0), bias=False) self.decoder.weight = bert_model_embedding_weights self.bias = nn.Parameter( torch.zeros(bert_model_embedding_weights.size(0)))
def __init__(self, config): super().__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) self.max_relative_pos_len = config.max_relative_pos_len self.pos_emb_type = config.pos_emb_type self.diff_head_pos = config.diff_head_pos if self.pos_emb_type == 'absolute': self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) else: self.position_embeddings = None self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) self.max_position_id = config.max_position_embeddings self.bert_word_dropout = config.bert_word_dropout self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.bert_emb_dropout) self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) self.max_position_id = config.max_position_embeddings # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) # 找到第三个dropout, 三个embedding相加后做LN,LN之后,执行一次dropout self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config, bert_word_dropout=None, bert_emb_dropout=None, bert_atten_dropout=None, bert_hidden_dropout=None, bert_hidden_size=None, is_decoder=False, before_plm_output_ln=False, gradient_checkpointing=False, **kwargs ): super().__init__(config) self.config = config if bert_word_dropout is not None: self.config.bert_word_dropout = bert_word_dropout if bert_emb_dropout is not None: self.config.bert_emb_dropout = bert_emb_dropout if bert_atten_dropout is not None: self.config.bert_atten_dropout = bert_atten_dropout if bert_hidden_dropout is not None: self.config.bert_hidden_dropout = bert_hidden_dropout if bert_hidden_size is not None: self.config.bert_hidden_size = bert_hidden_size self.config.max_relative_pos_len = kwargs.pop('max_pos_len', 0) self.config.diff_head_pos = kwargs.pop('diff_head_pos', False) self.config.pos_emb_type = kwargs.pop('pos_emb_type', "absolute") self.config.is_decoder = is_decoder self.config.before_plm_output_ln = before_plm_output_ln self.config.gradient_checkpointing = gradient_checkpointing self.embeddings = BertEmbeddings(self.config) self.encoder = BertEncoder(self.config) if self.config.before_plm_output_ln: self.before_plm_output_ln = BertLayerNorm(self.config.hidden_size, eps=self.config.layer_norm_eps) else: self.before_plm_output_ln = None self.init_weights()
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.POS_NULL) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) if config.graph_input: self.token_type_embeddings = nn.Embedding(config.type_vocab_size+1, config.hidden_size) else: self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) ### composition model self.fcompmodel = config.fcompmodel self.graph_input = config.graph_input if config.fcompmodel: self.compose = FFCompose(config.hidden_size,config.label_emb) ### label embedding if config.fcompmodel or config.graph_input: self.label_emb = config.label_embedding self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config, pos_tag_embedding=False, senti_embedding=False, polarity_embedding=False): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) if senti_embedding: self.senti_embeddings = nn.Embedding(3, config.hidden_size, padding_idx=2) else: self.register_parameter('senti_embeddings', None) if pos_tag_embedding: self.pos_tag_embeddings = nn.Embedding(5, config.hidden_size, padding_idx=4) else: self.register_parameter('pos_tag_embeddings', None) if polarity_embedding: self.polarity_embeddings = nn.Embedding(6, config.hidden_size, padding_idx=5) else: self.register_parameter('polarity_embeddings', None) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings_1 = QuantizeEmbedding( config.vocab_size, config.hidden_size, padding_idx=0, clip_val=config.clip_init_val, weight_bits=config.weight_bits, input_bits=config.input_bits, weight_quant_method=config.weight_quant_method, input_quant_method=config.input_quant_method, embed_layerwise=config.embed_layerwise, learnable=config.learnable_scaling, symmetric=config.sym_quant_qkvo) self.word_embeddings_2 = QuantizeEmbedding( config.vocab_size, config.hidden_size, padding_idx=0, clip_val=config.clip_init_val, weight_bits=config.weight_bits, input_bits=config.input_bits, weight_quant_method=config.weight_quant_method, input_quant_method=config.input_quant_method, embed_layerwise=config.embed_layerwise, learnable=config.learnable_scaling, symmetric=config.sym_quant_qkvo) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertOutput, self).__init__() self.dense = nn.Linear(config.intermediate_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertSelfOutput, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config): super(BertPredictionHeadTransform, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.transform_act_fn = ACT2FN[config.hidden_act] \ if isinstance(config.hidden_act, str) else config.hidden_act self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
def __init__(self, config): super(AlbertLayer, self).__init__() self.attention_1 = BertAttention(config) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-5) self.ffn_1 = BertFF(config) self.LayerNorm_1 = BertLayerNorm(config.hidden_size, eps=1e-5)
def __init__(self, in_hsz, out_hsz): super(MLPLayer, self).__init__() self.linear_1 = nn.Linear(in_hsz, in_hsz*2) self.LayerNorm = BertLayerNorm(in_hsz*2, eps=1e-5) self.linear_2 = nn.Linear(in_hsz*2, out_hsz) self.act = gelu
def __init__(self, config): super(BertSelfOutput, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) # 원래는 hidden * hidden 이였음 self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, config, num_answers): super().__init__() hid_dim = config.hidden_size self.logit_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, num_answers))
def __init__(self, config): super().__init__() self.dense = nn.Linear(config.intermediate_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.bert_hidden_dropout)