def __init__(self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, gradient_checkpointing=False, kbs={}, pretrained_model_name_or_path=None, **kwargs): # initialize bert config BertConfig.__init__( self, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, layer_norm_eps=layer_norm_eps, pad_token_id=pad_token_id, gradient_checkpointing=gradient_checkpointing, **kwargs) # save knowledge configurations self.kbs = kbs self.pretrained_model_name_or_path = pretrained_model_name_or_path
def __init__(self, **kwargs): # super(HeadlessBertConfig, self).__init__(**kwargs) HeadlessConfig.__init__(self, **kwargs) BertConfig.__init__(self, **kwargs)