示例#1
0
def convert_imagegpt_checkpoint_to_pytorch(imagegpt_checkpoint_path, model_size, pytorch_dump_folder_path):
    # Construct configuration depending on size
    MODELS = {"small": (512, 8, 24), "medium": (1024, 8, 36), "large": (1536, 16, 48)}
    n_embd, n_head, n_layer = MODELS[model_size]  # set model hyperparameters
    config = ImageGPTConfig(n_embd=n_embd, n_layer=n_layer, n_head=n_head)
    model = ImageGPTForCausalLM(config)

    # Load weights from numpy
    load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path)

    # Save pytorch-model
    pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
    pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
    print(f"Save PyTorch model to {pytorch_weights_dump_path}")
    torch.save(model.state_dict(), pytorch_weights_dump_path)
    print(f"Save configuration file to {pytorch_config_dump_path}")
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())
 def get_config(self,
                gradient_checkpointing=False,
                scale_attn_by_inverse_layer_idx=False,
                reorder_and_upcast_attn=False):
     return ImageGPTConfig(
         vocab_size=self.vocab_size,
         n_embd=self.hidden_size,
         n_layer=self.num_hidden_layers,
         n_head=self.num_attention_heads,
         n_inner=self.intermediate_size,
         activation_function=self.hidden_act,
         resid_pdrop=self.hidden_dropout_prob,
         attn_pdrop=self.attention_probs_dropout_prob,
         n_positions=self.max_position_embeddings,
         type_vocab_size=self.type_vocab_size,
         initializer_range=self.initializer_range,
         use_cache=True,
         gradient_checkpointing=gradient_checkpointing,
         scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
         reorder_and_upcast_attn=reorder_and_upcast_attn,
     )
 def get_large_model_config(self):
     return ImageGPTConfig.from_pretrained("imagegpt")