def __init__(self): super().__init__() self.bert = BertModel.from_pretrained('bert_base/') if args.bert_freeze: for param in self.bert.parameters(): param.requires_grad = False self.lstm = BiLSTM( input_size=args.bert_hidden_size + args.cnn_output_size, hidden_size=args.rnn_hidden_size + args.cnn_output_size, num_layers=args.rnn_num_layers, num_dirs=args.rnn_num_dirs) self.lstm_dropout = nn.Dropout(p=args.rnn_dropout) self.cnn = CharCNN(embedding_num=len(CHAR_VOCAB), embedding_dim=args.cnn_embedding_dim, filters=eval(args.cnn_filters), output_size=args.cnn_output_size) self.crf = CRF(target_size=len(VOCAB) + 2, use_cuda=args.crf_use_cuda) self.linear = nn.Linear(in_features=args.rnn_hidden_size + args.cnn_output_size, out_features=len(VOCAB) + 2) self.attn = MultiHeadAttention(model_dim=args.rnn_hidden_size + args.cnn_output_size, num_heads=args.attn_num_heads, dropout=args.attn_dropout) self.feat_dropout = nn.Dropout(p=args.feat_dropout)
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, my_pretrain_bert): # bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') # vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') # init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') # bert_config = BertConfig.from_json_file(bert_config_file) # tokenizer = tokenization.FullTokenizer( # vocab_file=vocab_file, do_lower_case=do_lower_case) # bert_config.print_status() # model_bert = BertModel(bert_config) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=args.do_lower_case) model_bert, bert_config = BertModel.from_pretrained('bert-base-uncased') if my_pretrain_bert: model_bert.load_state_dict( torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") else: pass model_bert.to(device) return model_bert, tokenizer, bert_config
def bertModel(*args, **kwargs): """ BertModel is the basic BERT Transformer model with a layer of summed token, position and sequence embeddings followed by a series of identical self-attention blocks (12 for BERT-base, 24 for BERT-large). """ model = BertModel.from_pretrained(*args, **kwargs) return model
def __init__(self, bert_model_dir, pre_trained_model): super(RoBertModel, self).__init__() self.bert = BertModel.from_pretrained( bert_model_dir, state_dict=torch.load(pre_trained_model)["state_dict"])