def test_model_from_pretrained(self): cache_dir = "/tmp/pytorch_pretrained_bert_test/" for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = OpenAIGPTModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def construct_encoder(self): model = OpenAIGPTModel.from_pretrained(self.model_name) model.cuda() model = torch.nn.DataParallel(model) model.eval() tokenizer = OpenAIGPTTokenizer.from_pretrained(self.model_name) print("Model and tokenzier are constructed!") return model, tokenizer
def sent_feat(text, feat_type): if feat_type == 'w2v': import gensim import numpy as np model = gensim.models.KeyedVectors.load_word2vec_format( '/scratch/shared/slow/yangl/w2v/GoogleNews-vectors-negative300.bin', binary=True) final_feats = [] for word in (text.split(' ')): if (word != 'a') and (word in model.vocab): final_feats.append(model.get_vector(word)) final_feats = np.asarray(final_feats) elif feat_type == 'openai': import json import torch from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel import logging logging.basicConfig(level=logging.INFO) # Load pre-trained model tokenizer (vocabulary) tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') # Tokenized input #text = "Who was Jim Henson ? Jim Henson was a puppeteer" model = OpenAIGPTModel.from_pretrained('openai-gpt') model.eval() model.to('cuda') tokenized_text = tokenizer.tokenize(text) # Convert token to vocabulary indices indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) # Convert inputs to PyTorch tensors tokens_tensor = torch.tensor([indexed_tokens]) # If you have a GPU, put everything on cuda tokens_tensor = tokens_tensor.to('cuda') # Predict hidden states features for each layer with torch.no_grad(): hidden_states = model(tokens_tensor) final_feats = hidden_states[0].cpu().numpy() else: print('Unrecognised FEAT_TYPE.') return final_feats
def __init__(self, num_labels=2): #Initialize parent class super().__init__() # Assign the number of classes self.num_labels = num_labels # Create a OpenAIGPTModel with the weigths 'openai-gpt' and pass in the device information self.openai_gpt = OpenAIGPTModel.from_pretrained('openai-gpt').to( device) # Create a dropout layer with the parameter defined in the configuration of the OpenAIGPTConfig class self.dropout = nn.Dropout(config.resid_pdrop).to(device) # Create a linear layer with parameters from the config class and the number of classes self.classifier = nn.Linear(config.n_embd, self.num_labels).to(device) # Initialize the weight of the linear classifier layer with xavier normal values nn.init.xavier_normal_(self.classifier.weight).to(device)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size',default=1,type=int,help='Batch size for inference') parser.add_argument('--model_name',default='openai-gpt',type=str, help='Pre-trained model name') parser.add_argument('--max_seq_length',default=128,type=int, help='Maximum total input sequence length after tokenization') args = parser.parse_args() input_ids = torch.zeros([args.batch_size,args.max_seq_length],dtype=torch.long) model = OpenAIGPTModel.from_pretrained(args.model_name) torch.onnx.export(model,input_ids,'openaigpt_'+'batch'+str(args.batch_size)+'.onnx')
def fetch_objects(): bert = BertModel.from_pretrained( 'bert-base-uncased').embeddings.position_embeddings.weight.data gpt = OpenAIGPTModel.from_pretrained( 'openai-gpt').positions_embed.weight.data gpt2 = GPT2Model.from_pretrained('gpt2').wpe.weight.data bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') gpt_tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') return { 'bert': bert, 'gpt': gpt, 'gpt2': gpt2 }, { 'bert': bert_tokenizer, 'gpt': gpt_tokenizer, 'gpt2': gpt2_tokenizer }
def __init__(self, model_name, add_dense=True, trainable=False): super().__init__() self.model_name = model_name self.add_dense = add_dense self.trainable = trainable if self.model_name == 'GPT': self.encoder = OpenAIGPTModel.from_pretrained('openai-gpt') elif self.model_name == 'GPT-2': self.encoder = GPT2Model.from_pretrained('gpt2') else: raise NotImplementedError(f'{self.model_name} -- No such model') if not self.trainable: for p in self.encoder.parameters(): p.requires_grad = False if self.add_dense: self.dense = nn.Linear(in_features=768, out_features=128)
def get_GPT_embeddings(vocab, dim): _embeddings = np.zeros([len(vocab), dim]) if "openai-gpt" not in OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys(): raise ValueError("Provided OpenAI GPT model is not available.") tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt") gpt_model = OpenAIGPTModel.from_pretrained("openai-gpt") with torch.no_grad(): for word in vocab: subwords = tokenizer.tokenize(word) indexed_tokens = tokenizer.convert_tokens_to_ids(subwords) tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = tokens_tensor.to(flair.device) hidden_states = gpt_model(tokens_tensor) first_embedding = hidden_states[0][0] last_embedding = hidden_states[0][len(hidden_states[0]) - 1] final_embedding = torch.cat([first_embedding, last_embedding]) _embeddings[vocab[word]] = final_embedding return _embeddings
def _init_model_tokenizer(self): from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel self._tokenizer = OpenAIGPTTokenizer.from_pretrained(self.model_dir) self._model = OpenAIGPTModel.from_pretrained(self.model_dir) self._model.eval()
parser.add_argument( "testset", help="Testing input sentences you want to embed plus labels") args = parser.parse_args() #set up logging import logging logging.basicConfig(filename="gpt.log", format="%(message)s", level=logging.INFO) # Load pre-trained model tokenizer (vocabulary) tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') # Load pre-trained model (weights) model = OpenAIGPTModel.from_pretrained('openai-gpt') model.to('cuda') model.eval() NUM_TRAIN = 4000 TOKENS = [0, 1, 2, 3, 4] messages = [] labels = [] probes = [] # get training and testing data with open(args.trainset, 'r') as data: csv_reader = csv.reader(data) for row in csv_reader: messages.append(row[0]) probes.append(row[1])
def __init__(self, n_layers, in_size, out_size, embed_size, dropout=0.5, initialEmbW=None, rnn_type='lstm', attention=None, q_size=-1, embedding_init=None, weights_init=None, elmo_init=False, elmo_num_outputs=1, finetune_elmo=False, bert_init=False, bert_model=None, finetune_bert=False, add_word_emb=True): """Initialize encoder with structure parameters Args: n_layers (int): Number of layers. in_size (int): Dimensionality of input vectors. out_size (int) : Dimensionality of hidden vectors to be output. embed_size (int): Dimensionality of word embedding. dropout (float): Dropout ratio. """ # TODO conv_out_size = 512 super(LSTMEncoder, self).__init__() self.embed = nn.Embedding(in_size, embed_size) if embedding_init is not None: self.embed.weight.data.copy_(torch.from_numpy(embedding_init)) elif weights_init is not None: self.embed.weight.data.copy_( torch.from_numpy(weights_init['embed'])) self.elmo_init = elmo_init self.bert_init = bert_init self.bert_model = bert_model self.add_word_emb = add_word_emb if elmo_init: options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, elmo_num_outputs, requires_grad=finetune_elmo) elmo_layer = [ nn.Linear(elmo_num_outputs * 1024, out_size), nn.ReLU() ] self.elmo_layer = nn.Sequential(*elmo_layer) elif bert_init: if 'bert' in bert_model: self.bert = BertModel.from_pretrained(bert_model) elif 'openai-gpt' in bert_model: self.bert = OpenAIGPTModel.from_pretrained(bert_model) elif 'gpt2' in bert_model: self.bert = GPT2Model.from_pretrained(bert_model) elif 'transfo-xl' in bert_model: self.bert = TransfoXLModel.from_pretrained(bert_model) self.finetune_bert = finetune_bert if not finetune_bert: for param in self.bert.parameters(): param.requires_grad = False if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']: bert_in = 768 elif bert_model in [ 'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103' ]: bert_in = 1024 bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()] self.bert_layer = nn.Sequential(*bert_layer) if rnn_type == 'lstm': self.lstm = nn.LSTM(embed_size, out_size, n_layers, batch_first=True, dropout=dropout) elif rnn_type == 'gru': self.lstm = nn.GRU(embed_size, out_size, n_layers, batch_first=True, dropout=dropout) self.attention = attention if attention == 'conv' or attention == 'conv_sum': conv_in_size = out_size self.conv1 = nn.Conv1d(in_channels=conv_in_size, out_channels=conv_out_size, kernel_size=1, padding=0) self.conv2 = nn.Conv1d(in_channels=conv_out_size, out_channels=2, kernel_size=1, padding=0) if weights_init is not None: self.conv1.weight.data.copy_( torch.from_numpy(weights_init['conv1'])) self.conv2.weight.data.copy_( torch.from_numpy(weights_init['conv2'])) elif attention == 'c_conv_sum': hidden_size = 512 conv_hidden_size = 256 layers = [ weight_norm(nn.Linear(out_size, hidden_size), dim=None), nn.ReLU() ] self.c_fa = nn.Sequential(*layers) layers = [ weight_norm(nn.Linear(q_size, hidden_size), dim=None), nn.ReLU() ] self.q_fa = nn.Sequential(*layers) layers = [ nn.Conv2d(in_channels=hidden_size, out_channels=conv_hidden_size, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=conv_hidden_size, out_channels=1, kernel_size=1) ] self.cq_att = nn.Sequential(*layers) if weights_init is not None: self.c_fa[0].weight.data.copy_( torch.from_numpy(weights_init['c_fa'])) self.q_fa[0].weight.data.copy_( torch.from_numpy(weights_init['q_fa'])) self.cq_att[0].weight.data.copy_( torch.from_numpy(weights_init['cq_att_conv1'])) self.cq_att[2].weight.data.copy_( torch.from_numpy(weights_init['cq_att_conv2']))
def __init__(self): super(GPTModel, self).__init__() self.mdl = OpenAIGPTModel.from_pretrained('openai-gpt') for param in self.mdl.parameters(): param.requires_grad = False
def __init__(self, n_layers, in_size, out_size, embed_size, in_size_hier, hidden_size, proj_size, dropout=0.5, initialEmbW=None, independent=False, rnn_type='lstm', classifier='baseline', states_att=False, state_size=-1, embedding_init=None, weights_init=None, elmo_init=False, elmo_num_outputs=1, finetune_elmo=False, bert_init=False, bert_model=None, finetune_bert=False, add_word_emb=True, pretrained_all=True): """Initialize encoder with structure parameters Args: n_layers (int): Number of layers. in_size (int): Dimensionality of input vectors. out_size (int): Dimensionality of output vectors. embed_size (int): Dimensionality of word embedding. hidden_size (int) : Dimensionality of hidden vectors. proj_size (int) : Dimensionality of projection before softmax. dropout (float): Dropout ratio. """ #TODO att_size = 128 self.rnn_type = rnn_type self.classifier = classifier super(HLSTMDecoder, self).__init__() self.embed = nn.Embedding(in_size, embed_size) if embedding_init is not None: self.embed.weight.data.copy_(torch.from_numpy(embedding_init)) elif weights_init is not None: self.embed.weight.data.copy_( torch.from_numpy(weights_init['embed'])) if rnn_type == 'lstm': self.lstm = nn.LSTM(embed_size + in_size_hier, hidden_size, n_layers, batch_first=True, dropout=dropout) elif rnn_type == 'gru': self.lstm = nn.GRU(embed_size + in_size_hier, hidden_size, n_layers, batch_first=True, dropout=dropout) if weights_init is not None: lstm_wt = weights_init['lstm'] for k, v in lstm_wt.items(): self.lstm.__getattr__(k).data.copy_(torch.from_numpy(v)) self.elmo_init = elmo_init self.bert_init = bert_init self.pretrained_all = pretrained_all self.bert_model = bert_model self.add_word_emb = add_word_emb if False: #if pretrained_all and elmo_init: options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, elmo_num_outputs, requires_grad=finetune_elmo) elmo_layer = [ nn.Linear(elmo_num_outputs * 1024, out_size), nn.ReLU() ] self.elmo_layer = nn.Sequential(*elmo_layer) elif False: #elif pretrained_all and bert_init: if 'bert' in bert_model: self.bert = BertModel.from_pretrained(bert_model) elif 'openai-gpt' in bert_model: self.bert = OpenAIGPTModel.from_pretrained(bert_model) elif 'gpt2' in bert_model: self.bert = GPT2Model.from_pretrained(bert_model) elif 'transfo-xl' in bert_model: self.bert = TransfoXLModel.from_pretrained(bert_model) self.finetune_bert = finetune_bert if not finetune_bert: for param in self.bert.parameters(): param.requires_grad = False if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']: bert_in = 768 elif bert_model in [ 'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103' ]: bert_in = 1024 bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()] self.bert_layer = nn.Sequential(*bert_layer) self.n_layers = n_layers self.dropout = dropout self.independent = independent self.states_att = states_att if states_att: self.ecW = nn.Linear(state_size, att_size) self.ysW = nn.Linear(hidden_size, att_size) hidden_size += state_size if classifier == 'baseline': layers = [ nn.Linear(hidden_size, proj_size), nn.Linear(proj_size, out_size) ] self.y_classifier = nn.Sequential(*layers) elif classifier == 'weighted_norm': layers = [ weight_norm(nn.Linear(hidden_size, proj_size), dim=None), nn.ReLU(), weight_norm(nn.Linear(proj_size, out_size), dim=None) ] self.y_classifier = nn.Sequential(*layers) elif classifier == 'logit': layers = [ weight_norm(nn.Linear(hidden_size, proj_size), dim=None), nn.ReLU(), nn.Linear(proj_size, out_size) ] self.classifier_txt = nn.Sequential(*layers) layers = [ weight_norm(nn.Linear(hidden_size, 2048), dim=None), nn.ReLU(), nn.Linear(2048, out_size) ] self.classifier_ft = nn.Sequential(*layers) if weights_init is not None: self.classifier_txt[0].weight.data.copy_( torch.from_numpy(weights_init['classifier_txt'])) self.classifier_ft[0].weight.data.copy_( torch.from_numpy(weights_init['classifier_ft']))
def __init__(self, n_wlayers, n_slayers, in_size, out_size, embed_size, hidden_size, dropout=0.5, ignore_label=None, initialEmbW=None, independent=False, rnn_type='lstm', embedding_init=None, weights_init=None, elmo_init=False, elmo_num_outputs=1, finetune_elmo=False, bert_init=False, bert_model=None, finetune_bert=False, add_word_emb=True, pretrained_all=True, concat_his=False): """Initialize encoder with structure parameters Args: n_layers (int): Number of layers. in_size (int): Dimensionality of input vectors. out_size (int) : Dimensionality of hidden vectors to be output. embed_size (int): Dimensionality of word embedding. dropout (float): Dropout ratio. """ super(HLSTMEncoder, self).__init__() self.embed = nn.Embedding(in_size, embed_size) if embedding_init is not None: self.embed.weight.data.copy_(torch.from_numpy(embedding_init)) elif weights_init is not None: self.embed.weight.data.copy_( torch.from_numpy(weights_init['embed'])) if rnn_type == 'lstm': self.wlstm = nn.LSTM(embed_size, hidden_size, n_wlayers, batch_first=True, dropout=dropout) self.slstm = nn.LSTM(hidden_size, out_size, n_slayers, batch_first=True, dropout=dropout) elif rnn_type == 'gru': self.wlstm = nn.GRU(embed_size, hidden_size, n_wlayers, batch_first=True, dropout=dropout) self.slstm = nn.GRU(hidden_size, out_size, n_slayers, batch_first=True, dropout=dropout) self.elmo_init = elmo_init self.bert_init = bert_init self.pretrained_all = pretrained_all self.concat_his = concat_his self.bert_model = bert_model self.add_word_emb = add_word_emb if pretrained_all and elmo_init: options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, elmo_num_outputs, requires_grad=finetune_elmo) elmo_layer = [ nn.Linear(elmo_num_outputs * 1024, out_size), nn.ReLU() ] self.elmo_layer = nn.Sequential(*elmo_layer) elif pretrained_all and bert_init: if 'bert' in bert_model: self.bert = BertModel.from_pretrained(bert_model) elif 'openai-gpt' in bert_model: self.bert = OpenAIGPTModel.from_pretrained(bert_model) elif 'gpt2' in bert_model: self.bert = GPT2Model.from_pretrained(bert_model) elif 'transfo-xl' in bert_model: self.bert = TransfoXLModel.from_pretrained(bert_model) self.finetune_bert = finetune_bert if not finetune_bert: for param in self.bert.parameters(): param.requires_grad = False if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']: bert_in = 768 elif bert_model in [ 'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103' ]: bert_in = 1024 bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()] self.bert_layer = nn.Sequential(*bert_layer) self.independent = independent self.rnn_type = rnn_type