def get_bert(path_bert): bert_config_file = path_bert + 'bert_config_uncased_L-12_H-768_A-12.json' vocab_file = path_bert + 'vocab_uncased_L-12_H-768_A-12.txt' init_checkpoint = path_bert + 'pytorch_model_uncased_L-12_H-768_A-12.bin' bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True) model_bert = BertModel(bert_config) model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") if gpu: model_bert.to(device) return model_bert, tokenizer, bert_config
def get_bert(BERT_PATH): bert_config_file = BERT_PATH + "/bert_config_uncased_L-12_H-768_A-12.json" vocab_file = BERT_PATH + "/vocab_uncased_L-12_H-768_A-12.txt" init_checkpoint = BERT_PATH + "/pytorch_model_uncased_L-12_H-768_A-12.bin" bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True) bert_config.print_status() model_bert = BertModel(bert_config) model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained BERT parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) bert_config.print_status() model_bert = BertModel(bert_config) model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) model_bert.to(device) return model_bert, tokenizer, bert_config
def get_bert(BERT_PT_PATH): bert_config_file = os.path.join(BERT_PT_PATH, 'bert_config.json') vocab_file = os.path.join(BERT_PT_PATH, 'vocab.txt') init_checkpoint = os.path.join(BERT_PT_PATH, 'pytorch_model.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file) bert_config.print_status() model_bert = BertModel(bert_config) model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f"bert_config_{bert_type}.json") vocab_file = os.path.join(BERT_PT_PATH, f"vocab_{bert_type}.txt") init_checkpoint = os.path.join(BERT_PT_PATH, f"pytorch_model_{bert_type}.bin") bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case ) bert_config.print_status() model_bert = BertModel(bert_config) if no_pretraining: pass else: model_bert.load_state_dict(torch.load(init_checkpoint, map_location="cpu")) print("Load pre-trained parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config.json') #bert的配置文件 vocab_file = os.path.join(BERT_PT_PATH, f'vocab.txt') #bert的词汇文件 init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model.bin') #bert的预训练模型(不一定有) """ ==BertConfig==该类在bert文件里的modeling里,用bert的配置文件初始化(默认uS) <from_json_file>方法用于读取bert配置文件的内容 """ bert_config = BertConfig.from_json_file(bert_config_file) """ ==tokenization==bert里的文件 ==FullTokenizer==类,里面存放词汇信息 """ tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) #毫无作用的输出参数 bert_config.print_status() """ ==BertModel==该类在bert文件里的modeling里,同样用bert的配置文件初始化,里面有一系列对bert模型的操作(例如添加层,加载参数等...) """ model_bert = BertModel(bert_config) if no_pretraining: #如果不用bert预训练模型,只要它们团队的模型(不需要.bin) pass else: model_bert.load_state_dict( torch.load(init_checkpoint, map_location='cpu')) #加载.bin文件,即加载预训练参数 print("Load pre-trained parameters.") model_bert.to(device) # bert模型 词汇 bert配置文件 return model_bert, tokenizer, bert_config