def __init__(self, config): super(RobertaLMHead, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False) self.bias = nn.Parameter(torch.zeros(config.vocab_size))
def __init__(self, config): super().__init__(config) self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=config.pad_token_id) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.embedding_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = BertLayerNorm(config.embedding_size, eps=config.layer_norm_eps)
def __init__(self, config, bert_model_embedding_weights): super(RobertaTokenAug, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. self.decoder = nn.Linear(bert_model_embedding_weights.size(1), 2, bias=False) # self.decoder.weight = bert_model_embedding_weights self.bias = nn.Parameter(torch.zeros(2))
def __init__(self, config, bert_model_embedding_weights, args): super(RobertaTokenAug_2, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. decoder = nn.Linear(bert_model_embedding_weights.size(1), 2, bias=False) self.op = nn.ModuleList( [copy.deepcopy(decoder) for _ in range(args.max_seq_length)]) bias = nn.Parameter(torch.zeros(2)).cuda() self.op_bias = [bias.cuda() for _ in range(args.max_seq_length)]
def __init__(self, config): super().__init__() self.LayerNorm = BertLayerNorm(config.embedding_size) self.dense = nn.Linear(config.hidden_size, config.embedding_size)