def convert_tf_checkpoint_to_pytorch(): # gave error originally. Solution found at: https://github.com/tensorflow/models/issues/2676 tf_path = 'weights/biobert_v1.1_pubmed/model.ckpt-1000000' init_vars = tf.train.list_variables(tf_path) excluded = ['BERTAdam', '_power', 'global_step'] init_vars = list(filter(lambda x: all([True if e not in x[0] else False for e in excluded]), init_vars)) print(init_vars) names = [] arrays = [] for name, shape in init_vars: print("Loading TF weights {} with shape {}".format(name,shape)) array = tf.train.load_variable(tf_path,name) names.append(name) arrays.append(array) config = BertConfig.from_json_file('weights/biobert_v1.1_pubmed/bert_config.json') print('Building Pytorch model from configuration {}'.format(str(config))) model = BertForPreTraining(config) for name, array in zip(names,arrays): name = name.split('/') # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model if any(n in ["adam_v", "adam_m", "global_step"] for n in name): print("Skipping {}".format("/".join(name))) continue pointer = model for m_name in name: if re.fullmatch(r'[A-Za-z]+_\d+', m_name): l = re.split(r'_(\d+)', m_name) else: l = [m_name] if l[0] == 'kernel' or l[0] == 'gamma': pointer = getattr(pointer, 'weight') elif l[0] == 'output_bias' or l[0] == 'beta': pointer = getattr(pointer, 'bias') elif l[0] == 'output_weights': pointer = getattr(pointer, 'weight') else: pointer = getattr(pointer, l[0]) if len(l) >= 2: num = int(l[1]) pointer = pointer[num] if m_name[-11:] == '_embeddings': pointer = getattr(pointer, 'weight') elif m_name == 'kernel': array = np.transpose(array) try: assert pointer.shape == array.shape except AssertionError as e: e.args += (pointer.shape, array.shape) raise print("Initialize PyTorch weight {}".format(name)) pointer.data = torch.from_numpy(array) # Save pytorch-model print("Save PyTorch model to {}".format('weights/')) torch.save(model.state_dict(), 'weights/pytorch_weight')
def load_BFTC_from_TF_ckpt(bert_config, ckpt_path, num_labels): """ Helper function for loading model - workaround to prevent error """ config = BertConfig.from_json_file(bert_config) model = BertForPreTraining(config) load_tf_weights_in_bert(model, ckpt_path) state_dict=model.state_dict() model = BertForTokenClassification(config, num_labels=num_labels) # Load from a PyTorch state_dict old_keys = [] new_keys = [] for key in state_dict.keys(): new_key = None if 'gamma' in key: new_key = key.replace('gamma', 'weight') if 'beta' in key: new_key = key.replace('beta', 'bias') if new_key: old_keys.append(key) new_keys.append(new_key) for old_key, new_key in zip(old_keys, new_keys): state_dict[new_key] = state_dict.pop(old_key) missing_keys = [] unexpected_keys = [] error_msgs = [] # copy state_dict so _load_from_state_dict can modify it metadata = getattr(state_dict, '_metadata', None) state_dict = state_dict.copy() if metadata is not None: state_dict._metadata = metadata def load(module, prefix=''): local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) module._load_from_state_dict( state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') start_prefix = '' if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()): start_prefix = 'bert.' load(model, prefix=start_prefix) if len(missing_keys) > 0: print("Weights of {} not initialized from pretrained model: {}".format( model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: print("Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) if len(error_msgs) > 0: raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( model.__class__.__name__, "\n\t".join(error_msgs))) return model
def _convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # adapated from https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py#L30 # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) model = BertForPreTraining(config) # Load weights from tf checkpoint _load_tf_weights_in_bert(model, tf_checkpoint_path) # Save pytorch-model torch.save(model.state_dict(), pytorch_dump_path)
def create_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = BertForPreTraining(config=config) model.eval() loss = model(input_ids, token_type_ids, input_mask, token_labels, sequence_labels) prediction_scores, seq_relationship_score = model(input_ids, token_type_ids, input_mask) outputs = { "loss": loss, "prediction_scores": prediction_scores, "seq_relationship_score": seq_relationship_score, } return outputs
def load_biobert_model(biobert_pth, device): """Read saved state dict for biobert model on disk Args: biobert_pth: str, folder path where model state dictionary and config file are saved """ bert_config_file = os.path.join(biobert_pth, 'bert_config.json') config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) model.load_state_dict(torch.load(os.path.join(biobert_pth, 'biobert_statedict.pkl'), map_location=device)) return model
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_bert(model, tf_checkpoint_path) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def __init__(self): # Load pre-trained model tokenizer (vocabulary) self.tokenizer = BertTokenizer.from_pretrained(self.bert_model) # Load pre-trained model (weights) self.model = BertForPreTraining.from_pretrained('bert-base-uncased') self.model.eval()
def __init__(self, vis_feat_dim=2208, spatial_size=7, hidden_dim=768, cmb_feat_dim=16000, kernel_size=3): """Initialize SkipGramDistNet.""" super(MCBertForPretrainingModel, self).__init__() self.vis_feat_dim = vis_feat_dim self.spatial_size = spatial_size self.hidden_dim = hidden_dim self.cmb_feat_dim = cmb_feat_dim self.kernel_size = kernel_size self.mcbert_model = MCBertModel(vis_feat_dim=vis_feat_dim, spatial_size=spatial_size, hidden_dim=hidden_dim, cmb_feat_dim=cmb_feat_dim, kernel_size=kernel_size) version = "bert-base-cased" bert_model = BertForPreTraining.from_pretrained(version) self.cls = bert_model.cls self.vocab_size = bert_model.config.vocab_size
def get_lm_ranker(bert_model, max_seq_length=100): tokenizer = BertTokenizer.from_pretrained(os.path.join( bert_model, "vocab.txt"), do_lower_case=True) transform = BertLmRankingTransform(tokenizer=tokenizer, max_len=max_seq_length) state_save_path = os.path.join(bert_model, 'model.state') if os.path.exists(state_save_path): state = torch.load(state_save_path, map_location="cpu") model = BertForPreTraining.from_pretrained( bert_model, state_dict=state['model_state']) else: previous_model_file = os.path.join(bert_model, "pytorch_model.bin") model_state_dict = torch.load(previous_model_file, map_location="cpu") model = BertForPreTraining.from_pretrained(bert_model, state_dict=model_state_dict) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() lm_ranker = LmBasedRanker(model, tokenizer, transform, device) return lm_ranker
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): config_path = os.path.abspath(bert_config_file) tf_path = os.path.abspath(tf_checkpoint_path) print("Converting TensorFlow checkpoint from {} with config at {}".format( tf_path, config_path)) # Load weights from TF model init_vars = tf.train.list_variables(tf_path) excluded = ['BERTAdam', '_power', 'global_step'] init_vars = list( filter( lambda x: all([True if e not in x[0] else False for e in excluded]), init_vars)) names = [] arrays = [] for name, shape in init_vars: print("Loading TF weight {} with shape {}".format(name, shape)) array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) for name, array in zip(names, arrays): name = name.split('/') # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model if any(n in ["adam_v", "adam_m", "global_step"] for n in name): print("Skipping {}".format("/".join(name))) continue pointer = model for m_name in name: if re.fullmatch(r'[A-Za-z]+_\d+', m_name): l = re.split(r'_(\d+)', m_name) else: l = [m_name] if l[0] == 'kernel' or l[0] == 'gamma': pointer = getattr(pointer, 'weight') elif l[0] == 'output_bias' or l[0] == 'beta': pointer = getattr(pointer, 'bias') elif l[0] == 'output_weights': pointer = getattr(pointer, 'weight') else: pointer = getattr(pointer, l[0]) if len(l) >= 2: num = int(l[1]) pointer = pointer[num] if m_name[-11:] == '_embeddings': pointer = getattr(pointer, 'weight') elif m_name == 'kernel': array = np.transpose(array) try: assert pointer.shape == array.shape except AssertionError as e: e.args += (pointer.shape, array.shape) raise print("Initialize PyTorch weight {}".format(name)) pointer.data = torch.from_numpy(array) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) if args.encoder_layers != args.decoder_layers: raise ValueError('--encoder-layers must match --decoder-layers') def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.encoder_embed_path: pretrained_encoder_embed = load_pretrained_embedding_from_file( args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim) else: num_embeddings = len(task.source_dictionary) pretrained_encoder_embed = Embedding(num_embeddings, args.encoder_embed_dim, task.source_dictionary.pad()) if args.share_all_embeddings: # double check all parameters combinations are valid if task.source_dictionary != task.target_dictionary: raise ValueError( '--share-all-embeddings requires a joint dictionary') if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise ValueError( '--share-all-embed not compatible with --decoder-embed-path' ) if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError( '--share-all-embeddings requires --encoder-embed-dim to ' 'match --decoder-embed-dim') pretrained_decoder_embed = pretrained_encoder_embed args.share_decoder_input_output_embed = True else: # separate decoder input embeddings pretrained_decoder_embed = None if args.decoder_embed_path: pretrained_decoder_embed = load_pretrained_embedding_from_file( args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim) # one last double check of parameter combinations if args.share_decoder_input_output_embed and ( args.decoder_embed_dim != args.decoder_out_embed_dim): raise ValueError( '--share-decoder-input-output-embeddings requires ' '--decoder-embed-dim to match --decoder-out-embed-dim') if args.encoder_freeze_embed: pretrained_encoder_embed.weight.requires_grad = False if args.decoder_freeze_embed: pretrained_decoder_embed.weight.requires_grad = False bert_model = BertForPreTraining.from_pretrained(args.bert_base) bert_model.load_state_dict(torch.load(args.bert_finetune)) bert = bert_model.bert encoder = LSTMEncoder(dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, hidden_size=args.encoder_hidden_size, num_layers=args.encoder_layers, dropout_in=args.encoder_dropout_in, dropout_out=args.encoder_dropout_out, bidirectional=args.encoder_bidirectional, pretrained_embed=pretrained_encoder_embed, layer=args.layer, bert=bert) decoder = LSTMDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, hidden_size=args.decoder_hidden_size, out_embed_dim=args.decoder_out_embed_dim, num_layers=args.decoder_layers, dropout_in=args.decoder_dropout_in, dropout_out=args.decoder_dropout_out, attention=options.eval_bool(args.decoder_attention), encoder_output_units=encoder.output_units, pretrained_embed=pretrained_decoder_embed, share_input_output_embed=args.share_decoder_input_output_embed, adaptive_softmax_cutoff=(options.eval_str_list( args.adaptive_softmax_cutoff, type=int) if args.criterion == 'adaptive_loss' else None), ) return cls(encoder, decoder)
import matplotlib import matplotlib.pyplot as plt from pylab import rcParams import torch import torch.nn.functional as F from pytorch_pretrained_bert import tokenization, BertTokenizer, BertModel, BertForMaskedLM, BertForPreTraining, BertConfig from examples.extract_features import * tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') CONFIG_NAME = 'bert_config.json' BERT_DIR = '/nas/pretrain-bert/pretrain-tensorflow/uncased_L-12_H-768_A-12/' config_file = os.path.join(BERT_DIR, CONFIG_NAME) config = BertConfig.from_json_file(config_file) model = BertForPreTraining.from_pretrained(BERT_DIR) model.eval() class Args: def __init__(self): pass args = Args() args.no_cuda = False device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") model.to(device)
akerke_tagged_complaints_path = 'Russian/Alem_Tagged_Complaints/akerke_tagged/pickle/dataset.pkl' ru_bert_path = '/home/alem/Alem_Sagandykov_Documents/Alem_Social/HERMES/' vocab_path = 'Production/vocab.txt' config_path = 'Production/bert_config.json' ru_bert_pytorch_weights_path_pth = 'pytorch_dump/deeppavlov_pretrained_rubert.pth' ru_bert_pytorch_weights_path_bin = 'pytorch_dump/rubert_cased_L-12_H-768_A-12_pt/pytorch_model.bin' vocab = load_vocab(os.path.join(ru_bert_path, vocab_path)) tags2index = { 'X': 0, 'O': 1, 'B-ORG': 2, 'I-ORG': 3, 'B-PER': 4, 'I-PER': 5, 'B-LOC': 6, 'I-LOC': 7 } index2tags = dict(zip(tags2index.values(), tags2index.keys())) config = BertConfig(os.path.join(ru_bert_path, config_path)) bert_for_pretraining = BertForPreTraining(config) ru_bert_weights = torch.load( os.path.join(ru_bert_path, ru_bert_pytorch_weights_path_bin)) bert_for_pretraining.load_state_dict(ru_bert_weights) bert_layer = bert_for_pretraining.bert
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=1, type=int, help='Batch size for inference') parser.add_argument( '--bert_model', default='bert-base-cased', type=str, help= 'Bert pre-trained model selected, e.g. bert-base-uncased, bert-large-uncased, bert-base-multilingual-case, bert-base-chinese' ) parser.add_argument( '--max_seq_length', default=128, type=int, help='Maximum total input sequence length after tokenization') args = parser.parse_args() input_ids = torch.zeros([args.batch_size, args.max_seq_length], dtype=torch.long) token_type_ids = torch.zeros([args.batch_size, args.max_seq_length], dtype=torch.long) # Export various BERT models # Note: For argument definitions used here see modeling.py from pytorch-pretrained-bert # repository # # Fully trained models model = BertModel.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForMaskedLM.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_maskedlm_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForNextSentencePrediction.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_nextsentence_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForPreTraining.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_pretraining_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') # Partially trained models model = BertForSequenceClassification.from_pretrained(args.bert_model, 2) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_classify_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx') model = BertForTokenClassification.from_pretrained(args.bert_model, 2) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_tokenclassify_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx') # Returns error on ONNX export about "squeeze with negative axis -1 might cause onnx model to be incorrect, so commented out. # # model = BertForQuestionAnswering.from_pretrained(args.bert_model) # torch.onnx.export(model,(input_ids,token_type_ids),'bert_question_'+'batch'+str(args.batch_size)+'_'+args.bert_model+'.untrained.onnx') choices = 2 input_ids = torch.zeros([args.batch_size, choices, args.max_seq_length], dtype=torch.long) token_type_ids = torch.zeros( [args.batch_size, choices, args.max_seq_length], dtype=torch.long) model = BertForMultipleChoice.from_pretrained(args.bert_model, choices) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_multiplechoice_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx')
init_vars = list(filter(lambda x:all([True if e not in x[0] else False for e in excluded]),init_vars)) names = [] arrays = [] for name, shape in init_vars: print("Loading TF weight {} with shape {}".format(name, shape)) array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) # Initialise PyTorch model config = BertConfig.from_json_file('weights/pubmed_pmc_470k/bert_config.json') print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) for name, array in zip(names, arrays): name = name.split('/') # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model if any(n in ["adam_v", "adam_m", "global_step"] for n in name): print("Skipping {}".format("/".join(name))) continue pointer = model for m_name in name: if re.fullmatch(r'[A-Za-z]+_\d+', m_name): l = re.split(r'_(\d+)', m_name) else: l = [m_name]