Python BertModel.load_state_dict示例

def get_bert(path_bert):
    bert_config_file = path_bert + 'bert_config_uncased_L-12_H-768_A-12.json'
    vocab_file = path_bert + 'vocab_uncased_L-12_H-768_A-12.txt'
    init_checkpoint = path_bert + 'pytorch_model_uncased_L-12_H-768_A-12.bin'
    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=True)
    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained parameters.")
    if gpu:
        model_bert.to(device)
    return model_bert, tokenizer, bert_config

示例#2

显示文件

def get_bert(BERT_PATH):
    bert_config_file = BERT_PATH + "/bert_config_uncased_L-12_H-768_A-12.json"
    vocab_file = BERT_PATH + "/vocab_uncased_L-12_H-768_A-12.txt"
    init_checkpoint = BERT_PATH + "/pytorch_model_uncased_L-12_H-768_A-12.bin"

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=True)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained BERT parameters.")
    model_bert.to(device)
    return model_bert, tokenizer, bert_config

示例#3

显示文件

def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):
    bert_config_file = os.path.join(BERT_PT_PATH,
                                    f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH,
                                   f'pytorch_model_{bert_type}.bin')

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    model_bert.to(device)

    return model_bert, tokenizer, bert_config

示例#4

显示文件

def get_bert(BERT_PT_PATH):

    bert_config_file = os.path.join(BERT_PT_PATH, 'bert_config.json')
    vocab_file = os.path.join(BERT_PT_PATH, 'vocab.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, 'pytorch_model.bin')


    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file)
    bert_config.print_status()

    model_bert = BertModel(bert_config)

    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config

示例#5

显示文件

文件： train.py 项目： jaidevd/sqlova

def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):

    bert_config_file = os.path.join(BERT_PT_PATH, f"bert_config_{bert_type}.json")
    vocab_file = os.path.join(BERT_PT_PATH, f"vocab_{bert_type}.txt")
    init_checkpoint = os.path.join(BERT_PT_PATH, f"pytorch_model_{bert_type}.bin")

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case
    )
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    if no_pretraining:
        pass
    else:
        model_bert.load_state_dict(torch.load(init_checkpoint, map_location="cpu"))
        print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config

示例#6

显示文件

文件： train.py 项目： WILDCHAP/sqlova_hw

def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):
    bert_config_file = os.path.join(BERT_PT_PATH,
                                    f'bert_config.json')  #bert的配置文件
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab.txt')  #bert的词汇文件
    init_checkpoint = os.path.join(BERT_PT_PATH,
                                   f'pytorch_model.bin')  #bert的预训练模型(不一定有)
    """
    ==BertConfig==该类在bert文件里的modeling里，用bert的配置文件初始化（默认uS）
    <from_json_file>方法用于读取bert配置文件的内容
    """
    bert_config = BertConfig.from_json_file(bert_config_file)
    """
    ==tokenization==bert里的文件
    ==FullTokenizer==类，里面存放词汇信息
    """
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)

    #毫无作用的输出参数
    bert_config.print_status()
    """
    ==BertModel==该类在bert文件里的modeling里，同样用bert的配置文件初始化，里面有一系列对bert模型的操作（例如添加层，加载参数等...）
    """
    model_bert = BertModel(bert_config)

    if no_pretraining:  #如果不用bert预训练模型，只要它们团队的模型（不需要.bin）
        pass
    else:
        model_bert.load_state_dict(
            torch.load(init_checkpoint,
                       map_location='cpu'))  #加载.bin文件，即加载预训练参数
        print("Load pre-trained parameters.")
    model_bert.to(device)

    #      bert模型       词汇       bert配置文件
    return model_bert, tokenizer, bert_config