def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer): self.job_config = job_config if not use_pretrain: model_config = self.job_config.get_model_config() bert_config = BertConfig(**model_config) bert_config.vocab_size = len(tokenizer.vocab) self.bert_encoder = BertModel(bert_config) # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained( self.job_config.get_model_file_type(), cache_dir=cache_dir) bert_config = self.bert_encoder.config self.network = MTLRouting(self.bert_encoder, write_log=write_log, summary_writer=summary_writer) #config_data=self.config['data'] # Pretrain Dataset self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=BertPretrainingLoss( self.bert_encoder, bert_config)) self.device = device
def __init__( self, bert_model, output_dim, add_transformer_layer=False, layer_pulled=-1, aggregation="first", ): super(BertWrapper, self).__init__() self.layer_pulled = layer_pulled self.aggregation = aggregation self.add_transformer_layer = add_transformer_layer # deduce bert output dim from the size of embeddings bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1) if add_transformer_layer: config_for_one_layer = BertConfig( 0, hidden_size=bert_output_dim, num_attention_heads=int(bert_output_dim / 64), intermediate_size=3072, hidden_act='gelu', ) self.additional_transformer_layer = BertLayer(config_for_one_layer) self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim) self.bert_model = bert_model
def load_model2(is_train, device, output_model_file, output_config_file, output_vocab_file, max_seq_length, chooser): output_config_file = output_config_file + str(chooser) + ".bin" output_model_file = output_model_file + str(chooser) + ".bin" output_vocab_file = output_vocab_file + str(chooser) + ".bin" try: config = BertConfig.from_json_file(output_config_file) model = BertBoosting(config, 768) state_dict = torch.load(output_model_file) model.load_state_dict(state_dict) tokenizer = BertTokenizer(output_vocab_file, do_lower_case=False, max_len=max_seq_length) model.cuda() except: print("could not load file, initializing randomly") model = BertBoosting.from_pretrained("bert-base-cased") tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False, max_len=max_seq_length) if is_train == 0: model.train() else: model.eval() return model, tokenizer
def __init__(self, copy_attn, vocab_size, pad_idx, init_context=False, token_type='A', opt=None): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'bert' self.pad_idx = pad_idx self.token_type = token_type self.init_context = init_context self.opt = opt # Decoder State self.state = {} self._copy = copy_attn self.config = BertConfig(vocab_size) bert = BertModel(self.config) self.embeddings = MyBertEmbeddings(bert.embeddings, token_type, opt) self.transformer_layers = nn.ModuleList([ BERTDecoderLayer(bert_layer, init_context) for bert_layer in bert.encoder.layer ])
def eval(config, filename): logger = config.get_logger('test') # setup data_loader instances processor = config.initialize('processor', module_processor, logger, config) processor.get_eval(filename) test_data_loader = config.initialize('data_loader', module_data, processor.data_dir, mode="eval", debug=config.debug_mode) # build model architecture, then print to console if config.bert_config_path: bert_config = BertConfig(config.bert_config_path) model = config.initialize('arch', module_arch, config=bert_config, num_labels=processor.nums_label()) else: model = config.initialize_bert_model('arch', module_arch, num_labels=processor.nums_label()) logger.info(model) agent = Agent(model, config=config, test_data_loader=test_data_loader) return agent.test(detail=True), processor
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False mem_size = self.bert_config.hidden_size self.decoder_opt = opt['answer_opt'] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] self.bert_pooler = None for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if decoder_opt == 1: out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout) self.scoring_list.append(out_proj) else: out_proj = nn.Linear(self.bert_config.hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init() self.set_embed(opt)
def load_bert_adapter(task_type, bert_model_name, bert_load_mode, bert_load_args, all_state, num_labels, bert_config_json_path): if bert_config_json_path is None: bert_config_json_path = os.path.join( get_bert_config_path(bert_model_name), "bert_config.json") if bert_load_mode in ["model_only_adapter"]: adapter_state = all_state elif bert_load_mode in ["state_adapter"]: adapter_state = all_state["model"] else: raise KeyError(bert_load_mode) # Format: "bert_model_path:{path}" # Very hackish bert_state = torch.load(bert_load_args.replace("bert_model_path:", "")) config = BertConfig.from_json_file(bert_config_json_path) if task_type == TaskType.CLASSIFICATION: model = BertForSequenceClassification(config, num_labels=num_labels) elif task_type == TaskType.REGRESSION: assert num_labels == 1 model = BertForSequenceRegression(config) else: raise KeyError(task_type) load_from_adapter( model=model, bert_state=bert_state, adapter_state=adapter_state, ) return model
def test_sliding_window_with_batch(self): tokenizer = WordTokenizer(word_splitter=BertBasicWordSplitter()) sentence = "the quickest quick brown fox jumped over the lazy dog" tokens = tokenizer.tokenize(sentence) vocab = Vocabulary() vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt' token_indexer = PretrainedBertIndexer(str(vocab_path), truncate_long_sequences=False, max_pieces=8) config_path = self.FIXTURES_ROOT / 'bert' / 'config.json' config = BertConfig(str(config_path)) bert_model = BertModel(config) token_embedder = BertEmbedder(bert_model, max_pieces=8) instance = Instance({"tokens": TextField(tokens, {"bert": token_indexer})}) instance2 = Instance({"tokens": TextField(tokens + tokens + tokens, {"bert": token_indexer})}) batch = Batch([instance, instance2]) batch.index_instances(vocab) padding_lengths = batch.get_padding_lengths() tensor_dict = batch.as_tensor_dict(padding_lengths) tokens = tensor_dict["tokens"] bert_vectors = token_embedder(tokens["bert"], offsets=tokens["bert-offsets"]) assert bert_vectors is not None
def __init__(self, config): self.config = config self.align_layer_idx = -1 cout_word = f'{self.description}: building ' sys.stdout.write(cout_word) sys.stdout.flush() bert_config = BertConfig( vocab_size_or_config_json_file=config.phn_size, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, ) self.bert_model = BertModel( bert_config, config.feat_dim, config.phn_size, (config.batch_size * config.repeat) // 2, ) self.optimizer = BertAdam( params=self.bert_model.parameters(), lr=config.sup_lr, warmup=0.1, t_total=config.pretrain_step + config.finetune_step, ) if torch.cuda.is_available(): self.bert_model.cuda() sys.stdout.write('\b' * len(cout_word)) cout_word = f'{self.description}: finish ' sys.stdout.write(cout_word + '\n') sys.stdout.flush()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): config_path = os.path.abspath(bert_config_file) tf_path = os.path.abspath(tf_checkpoint_path) print("Converting TensorFlow checkpoint from {} with config at {}".format( tf_path, config_path)) # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: print("Loading TF weight {} with shape {}".format(name, shape)) array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) for name, array in zip(names, arrays): name = name.split('/') # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model if any(n in ["adam_v", "adam_m"] for n in name): print("Skipping {}".format("/".join(name))) continue pointer = model for m_name in name: if re.fullmatch(r'[A-Za-z]+_\d+', m_name): l = re.split(r'_(\d+)', m_name) else: l = [m_name] if l[0] == 'kernel' or l[0] == 'gamma': pointer = getattr(pointer, 'weight') elif l[0] == 'output_bias' or l[0] == 'beta': pointer = getattr(pointer, 'bias') elif l[0] == 'output_weights': pointer = getattr(pointer, 'weight') else: pointer = getattr(pointer, l[0]) if len(l) >= 2: num = int(l[1]) pointer = pointer[num] if m_name[-11:] == '_embeddings': pointer = getattr(pointer, 'weight') elif m_name == 'kernel': array = np.transpose(array) try: assert pointer.shape == array.shape except AssertionError as e: e.args += (pointer.shape, array.shape) raise print("Initialize PyTorch weight {}".format(name)) pointer.data = torch.from_numpy(array) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def get_config(config_path_or_type, logger): if config_path_or_type in PRETRAINED_MODEL_ARCHIVE_MAP: archive_file = PRETRAINED_MODEL_ARCHIVE_MAP[config_path_or_type] else: archive_file = config_path_or_type # redirect to the cache, if necessary try: resolved_archive_file = cached_path(archive_file) except EnvironmentError: logger.error( "Model name '{}' was not found in model name list ({}). " "We assumed '{}' was a path or url but couldn't find any file " "associated to this path or url.".format( config_path_or_type, ', '.join(PRETRAINED_MODEL_ARCHIVE_MAP.keys()), archive_file)) return None if resolved_archive_file == archive_file: logger.info("loading archive file {}".format(archive_file)) else: logger.info("loading archive file {} from cache at {}".format( archive_file, resolved_archive_file)) if os.path.isdir(resolved_archive_file): serialization_dir = resolved_archive_file else: # Extract archive to temp dir tempdir = tempfile.mkdtemp() logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: archive.extractall(tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) config = BertConfig.from_json_file(config_file) return config
def create_model(args, device, config_file='', weights_file=''): ''' create squad model from args ''' ModelClass = None if args.squad_model == 'bert_base': print('creating bert base model') ModelClass = SquadModel if args.squad_model == 'bert_linear': print('creating bert linear model') ModelClass = SquadLinearModel if args.squad_model == 'bert_deep': print('creating bert deep model') ModelClass = SquadDeepModel if args.squad_model == 'bert_qanet': print('creating bert qanet model') ModelClass = SquadModelQANet if config_file == '' and weights_file == '': print('creating an untrained model') return ModelClass.from_pretrained(args.bert_model, cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))) else: print('loading a trained model') config = BertConfig(config_file) model = ModelClass(config) model.load_state_dict(torch.load(weights_file, map_location=device)) return model
def create_model(args, dataset, train=True): print("[*] Create model.") global model if train: model = BertForSequenceClassification.from_pretrained(BERT, num_labels=5) else: if BERT == 'bert-large-uncased': config = BertConfig.from_json_file("uncase_model") else: config = BertConfig.from_json_file("case_model") model = BertForSequenceClassification(config, num_labels=5) # for i in model.bert.named_parameters(): # i[1].requires_grad=False model = model.to(device) # print(model) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if train: num_train_optimization_steps = int( len(dataset["train"]) / args.batch_size / args.gradient_accumulation_steps) * args.epochs global optimizer optimizer = BertAdam(optimizer_grouped_parameters, lr=args.lr_rate, warmup=0.1, t_total=num_train_optimization_steps) # optimizer = optim.Adam(model.parameters(), # lr=args.lr_rate) # , betas=(0.9, 0.999), weight_decay=1e-3) return
def train(model, args, n_gpu, optimizer, num_train_optimization_steps, num_labels, train_dataloader, device): ''' train model ''' model.train() global global_step global nb_tr_steps global tr_loss for _ in trange(int(args.num_train_epochs), desc="Epoch"): tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch #print("input_ids_shape", input_ids.shape) #print("label_ids_shape", label_ids.shape) loss = model(input_ids, segment_ids, input_mask, label_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically lr_this_step = args.learning_rate * warmup_linear( global_step / num_train_optimization_steps, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 ## save model model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) torch.save(model_to_save.state_dict(), output_model_file) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) with open(output_config_file, 'w') as f: f.write(model_to_save.config.to_json_string()) # Load a trained model and config that you have fine-tuned config = BertConfig(output_config_file) model = BertForDocMultiClassification(config, num_labels=num_labels) model.load_state_dict(torch.load(output_model_file)) return model
def test_squad_with_unwordpieceable_passage(self): tokenizer = SpacyTokenizer() token_indexer = PretrainedBertIndexer("bert-base-uncased") passage1 = ( "There were four major HDTV systems tested by SMPTE in the late 1970s, " "and in 1979 an SMPTE study group released A Study of High Definition Television Systems:" ) question1 = "Who released A Study of High Definition Television Systems?" passage2 = ( "Broca, being what today would be called a neurosurgeon, " "had taken an interest in the pathology of speech. He wanted " "to localize the difference between man and the other animals, " "which appeared to reside in speech. He discovered the speech " "center of the human brain, today called Broca's area after him. " "His interest was mainly in Biological anthropology, but a German " "philosopher specializing in psychology, Theodor Waitz, took up the " "theme of general and social anthropology in his six-volume work, " "entitled Die Anthropologie der Naturvölker, 1859–1864. The title was " """soon translated as "The Anthropology of Primitive Peoples". """ "The last two volumes were published posthumously.") question2 = "What did Broca discover in the human brain?" from allennlp.data.dataset_readers.reading_comprehension.util import ( make_reading_comprehension_instance, ) instance1 = make_reading_comprehension_instance( tokenizer.tokenize(question1), tokenizer.tokenize(passage1), {"bert": token_indexer}, passage1, ) instance2 = make_reading_comprehension_instance( tokenizer.tokenize(question2), tokenizer.tokenize(passage2), {"bert": token_indexer}, passage2, ) vocab = Vocabulary() batch = Batch([instance1, instance2]) batch.index_instances(vocab) padding_lengths = batch.get_padding_lengths() tensor_dict = batch.as_tensor_dict(padding_lengths) qtokens = tensor_dict["question"] ptokens = tensor_dict["passage"] config = BertConfig(len(token_indexer.vocab)) model = BertModel(config) embedder = BertEmbedder(model) _ = embedder(ptokens["bert"], offsets=ptokens["bert-offsets"]) _ = embedder(qtokens["bert"], offsets=qtokens["bert-offsets"])
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.encoder_type = opt['encoder_type'] if opt['encoder_type'] == EncoderModelType.ROBERTA: from fairseq.models.roberta import RobertaModel self.bert = RobertaModel.from_pretrained(opt['init_checkpoint']) hidden_size = self.bert.args.encoder_embed_dim self.pooler = LinearPooler(hidden_size) else: self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) hidden_size = self.bert_config.hidden_size if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False self.decoder_opt = opt['answer_opt'] self.task_types = opt["task_types"] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] task_type = self.task_types[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if task_type == TaskType.Span: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SeqenceLabeling: out_proj = nn.Linear(hidden_size, lab) elif task_type == TaskType.MaskLM: if opt['encoder_type'] == EncoderModelType.ROBERTA: # TODO: xiaodl out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: if decoder_opt == 1: out_proj = SANClassifier(hidden_size, hidden_size, lab, opt, prefix='answer', dropout=dropout) else: out_proj = nn.Linear(hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init()
def __init__(self, vocab_size, pad_idx): super(BERTEncoder, self).__init__() self.config = BertConfig(vocab_size) bert = BertModel(self.config) self.embeddings = \ MyEncoderBertEmbeddings(bert.embeddings) self.encoder = bert.encoder self.pad_idx = pad_idx
def __init__(self, num_choices, bert_config_file): self.num_choices = num_choices bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, bert_config) self.bert = BertModel(bert_config) self.dropout = nn.Dropout(bert_config.hidden_dropout_prob) self.classifier = nn.Linear(bert_config.hidden_size, 1) self.activation = nn.Sigmoid() self.apply(self.init_bert_weights)
def __init__(self, vocab_size, original_hidden_size, num_layers, tau=1): super().__init__() self.bert_layer = BertLayer(BertConfig( vocab_size_or_config_json_file=vocab_size, hidden_size=original_hidden_size * num_layers, )) self.linear_layer = nn.Linear(original_hidden_size * num_layers, 1) self.log_sigmoid = nn.LogSigmoid() self.tau = tau
def setUp(self): super().setUp() self.monkeypatch = MonkeyPatch() # monkeypatch the PretrainedBertModel to return the tiny test fixture model config_path = self.FIXTURES_ROOT / "bert" / "config.json" config = BertConfig(str(config_path)) self.monkeypatch.setattr(BertModel, "from_pretrained", lambda _: BertModel(config))
def load_bert(model_path="bert/model/pytorch_model.bin", config_file="bert/config_parameters/config.json"): print("Loading BERT-model...") config = BertConfig(config_file) model = BertForQuestionAnswering(config) model.load_state_dict( torch.load(model_path, map_location=torch.device("cpu"))) print("Model loaded.\n\n") return model
def main(args): logging = config.get_logging(args.log_name) logging.info(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() tokenizer = BertTokenizer.build_tokenizer(args) # train_data_iter = MSmarco_iterator(args, tokenizer, batch_size=args.train_batch_size, world_size=n_gpu, accumulation_steps=args.gradient_accumulation_steps, name="msmarco_train.pk") dev_data_iter = MSmarco_iterator(args, tokenizer, batch_size=args.valid_batch_size, world_size=n_gpu, name="msmarco_dev.pk") logging.info("| dev batch data size {}".format(len(dev_data_iter))) # num_train_steps = (96032//2//2)+(data_size-96032)//2 missing_keys = [] unexpected_keys = [] error_msgs = [] pre_dir = args.pre_dir config_file = os.path.join(pre_dir, CONFIG_NAME) bert_config = BertConfig.from_json_file(config_file) model = MSmarco(bert_config) logging.info("| load model from {}".format(args.path)) state_dict = torch.load(args.path, map_location=torch.device('cpu')) metadata = getattr(state_dict, '_metadata', None) # state_dict = state_dict.copy() # if metadata is not None: # state_dict._metadata = metadata def load(module, prefix=''): local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) module._load_from_state_dict( state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') load(model, prefix='module.') if len(missing_keys) > 0: # logger.info("Weights of {} not initialized from pretrained model: {}".format( # model.__class__.__name__, missing_keys)) print("| Weights of {} not initialized from pretrained model: {}".format( model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: # logger.info("Weights from pretrained model not used in {}: {}".format( # model.__class__.__name__, unexpected_keys)) print("Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) # model._load_from_state_dict(state_dict, prefix="module.") model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) # save_checkpoint(args, model, epochs) validation(args, model, dev_data_iter, n_gpu, 0, 0, logging)
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_bert(model, tf_checkpoint_path) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def from_fine_tuned(cls, model_path, map_location='default_map_location', *inputs, **kwargs): config = BertConfig(os.path.join(model_path, CONFIG_NAME)) model = cls(config, *inputs, **kwargs) saved_kwargs = MultiPredictionHead.load_kwargs(model_path) if map_location == 'default_map_location': map_location = 'cpu' if not torch.cuda.is_available() else None state_dict = torch.load(os.path.join(model_path, WEIGHTS_NAME), map_location=map_location) model.prediction_head.update_state_dict('prediction_head.', state_dict, saved_kwargs) model.load_state_dict(state_dict, strict=False) return model
def setUp(self): super().setUp() vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt' self.token_indexer = PretrainedBertIndexer(str(vocab_path)) config_path = self.FIXTURES_ROOT / 'bert' / 'config.json' config = BertConfig(str(config_path)) self.bert_model = BertModel(config) self.token_embedder = BertEmbedder(self.bert_model)
def load_model(self, model_dir, model_config: str = "model_config.json"): model_config = os.path.join(model_dir, model_config) model_config = json.load(open(model_config)) output_config_file = os.path.join(model_dir, CONFIG_NAME) output_model_file = os.path.join(model_dir, WEIGHTS_NAME) config = BertConfig(output_config_file) model = BertForSequenceClassification(config, num_labels=model_config["num_labels"]) model.load_state_dict(torch.load(output_model_file, map_location='cpu')) tokenizer = BertTokenizer.from_pretrained(model_config["bert_model"], do_lower_case=model_config["do_lower"]) return model, tokenizer, model_config
def __init__(self, bert_model_path, decoder_config, device): super().__init__() self.bert_encoder = BertModel.from_pretrained(bert_model_path) bert_config_file = os.path.join(bert_model_path, CONFIG_NAME) bert_config = BertConfig.from_json_file(bert_config_file) self.device = device self.bert_emb = BertEmbeddings(bert_config) self.decoder = BertDecoder(decoder_config, self.bert_emb, device) self.teacher_forcing = 0.5
def load_model(self, model_dir: str, model_config: str = "model_config.json"): model_config = os.path.join(model_dir,model_config) model_config = json.load(open(model_config)) output_config_file = os.path.join(model_dir, CONFIG_NAME) output_model_file = os.path.join(model_dir, WEIGHTS_NAME) config = BertConfig(output_config_file) model = BertForTokenClassification(config, num_labels=model_config["num_labels"]) model.load_state_dict(torch.load(output_model_file)) tokenizer = FullTokenizer(model_file='cased_bert_base_pytorch/mn_cased.model', vocab_file='cased_bert_base_pytorch/mn_cased.vocab', do_lower_case=False) return model, tokenizer, model_config
def load_model_multilabel( model_path: Path, model_name: str, num_labels: int = 2 ) -> BertForMultiLabelSequenceClassification: model_path = Path(model_path) config = BertConfig(str(model_path / f"{model_name}-config.json")) model = BertForMultiLabelSequenceClassification(config, num_labels=num_labels) model.load_state_dict( torch.load(str(model_path / f"{model_name}-model.pt"), map_location=device) ) return model
def __init__(self, config: Config, output_encoded_layers: bool, *args, **kwargs) -> None: super().__init__(config, output_encoded_layers=output_encoded_layers) # Load config config_file = os.path.join(config.bert_cpt_dir, "bert_config.json") bert_config = BertConfig.from_json_file(config_file) print("Bert model config {}".format(bert_config)) # Instantiate model. model = BertModel(bert_config) weights_path = os.path.join(config.bert_cpt_dir, "pytorch_model.bin") # load pre-trained weights if weights_path exists if config.load_weights and PathManager.isfile(weights_path): state_dict = torch.load(weights_path) missing_keys: List[str] = [] unexpected_keys: List[str] = [] error_msgs: List[str] = [] # copy state_dict so _load_from_state_dict can modify it metadata = getattr(state_dict, "_metadata", None) state_dict = state_dict.copy() if metadata is not None: state_dict._metadata = metadata def load(module, prefix=""): local_metadata = ({} if metadata is None else metadata.get( prefix[:-1], {})) module._load_from_state_dict( state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs, ) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + ".") load(model, prefix="" if hasattr(model, "bert") else "bert.") if len(missing_keys) > 0: print( "Weights of {} not initialized from pretrained model: {}". format(model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: print( "Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) self.bert = model self.projection = (torch.nn.Linear(model.config.hidden_size, config.projection_dim) if config.projection_dim > 0 else None) log_class_usage(__class__)