def convert_imagegpt_checkpoint_to_pytorch(imagegpt_checkpoint_path, model_size, pytorch_dump_folder_path): # Construct configuration depending on size MODELS = {"small": (512, 8, 24), "medium": (1024, 8, 36), "large": (1536, 16, 48)} n_embd, n_head, n_layer = MODELS[model_size] # set model hyperparameters config = ImageGPTConfig(n_embd=n_embd, n_layer=n_layer, n_head=n_head) model = ImageGPTForCausalLM(config) # Load weights from numpy load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path) # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME print(f"Save PyTorch model to {pytorch_weights_dump_path}") torch.save(model.state_dict(), pytorch_weights_dump_path) print(f"Save configuration file to {pytorch_config_dump_path}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string())
def get_config(self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False): return ImageGPTConfig( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, n_inner=self.intermediate_size, activation_function=self.hidden_act, resid_pdrop=self.hidden_dropout_prob, attn_pdrop=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, use_cache=True, gradient_checkpointing=gradient_checkpointing, scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx, reorder_and_upcast_attn=reorder_and_upcast_attn, )
def get_large_model_config(self): return ImageGPTConfig.from_pretrained("imagegpt")