def __init__(self, config, num_labels): super().__init__(config) self.transformer = GPT2Model(config) self.cnn1 = nn.Conv1d(768, 256, kernel_size=3, padding=1) self.cnn2 = nn.Conv1d(256, num_labels, kernel_size=3, padding=1) self.apply(self.init_weights)
def gpt2Model(*args, **kwargs): """ gpt2Model is the basic OpenAI GPT-2 Transformer model based on identical stacked masked self-attention blocks and pre-trained on large scale dataset using language modeling signal. Example: # Load the tokenizer >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" >>> text_2 = "Jim Henson was a puppeteer" >>> indexed_tokens_1 = tokenizer.encode(text_1) >>> indexed_tokens_2 = tokenizer.encode(text_2) >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load gpt2Model >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2') >>> model.eval() # Predict hidden states features for each layer # past can be used to reuse precomputed hidden state in a subsequent predictions >>> with torch.no_grad(): hidden_states_1, past = model(tokens_tensor_1) hidden_states_2, past = model(tokens_tensor_2, past=past) """ model = GPT2Model.from_pretrained(*args, **kwargs) return model
def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path): # Construct model if gpt2_config_file == "": config = GPT2Config() else: config = GPT2Config(gpt2_config_file) model = GPT2Model(config) # Load weights from numpy load_tf_weights_in_gpt2(model, gpt2_checkpoint_path) # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + '/' + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + '/' + CONFIG_NAME print("Save PyTorch model to {}".format(pytorch_weights_dump_path)) torch.save(model.state_dict(), pytorch_weights_dump_path) print("Save configuration file to {}".format(pytorch_config_dump_path)) with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string())