Python GPT2ModelPipe примеры использования

Язык программирования: Python

Пространство имен/Пакет: megatron.model

Класс/Тип: GPT2ModelPipe

Примеров на hotexamples.com: 4

Python GPT2ModelPipe - 4 примера найдено. Это лучшие примеры Python кода для megatron.model.GPT2ModelPipe, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GPT2ModelPipe(4)

Основные методы

GPT2ModelPipe (4)

Пример #1

Показать файл

def get_model(neox_args, inference=False, get_key_value=True):
    """Build the model."""

    print_rank_0('building GPT2 model ...')

    # Build model on cpu.
    model = GPT2ModelPipe(neox_args=neox_args,
                          num_tokentypes=0,
                          parallel_output=True,
                          topology=mpu.get_topology(),
                          inference=inference,
                          get_key_value=get_key_value)
    if not neox_args.is_pipe_parallel:
        # Export PipeParallel model to nn.Sequential model to avoid the overhead of deepspeed's pipe parallel training
        model = model.to_sequential()
    else:
        # This is a hack to give us a reference to get_batch_pipe from within training.py
        # We need to call model.set_batch_fn after deepspeed.initialize
        model._megatron_batch_fn = partial(get_batch_pipe, neox_args=neox_args)

    if neox_args.deepspeed:
        # DeepSpeed handles CUDA, FP16, and DDP components.
        return model
    else:
        raise ValueError("Must be using deepspeed to run neox")

Пример #2

Показать файл

def model_provider():
    """Build the model."""

    args = get_args()

    print_rank_0('building GPT2 model ...')
    if args.pipe_parallel_size == 0:
        model = GPT2Model(num_tokentypes=0, parallel_output=True)
    else:
        model = GPT2ModelPipe(num_tokentypes=0,
                              parallel_output=True,
                              topology=mpu.get_topology())
        # This is a hack to give us a reference to get_batch_pipe from within training.py
        # We need to call model.set_batch_fn after deepspeed.initialize
        model._megatron_batch_fn = get_batch_pipe

    return model

Пример #3

Показать файл

Файл: pretrain_gpt2.py Проект: vishal2241/gpt-neox

def model_provider():
    """Build the model."""

    args = get_args()

    print_rank_0('building GPT2 model ...')
    if args.pipe_parallel_size == 0:
        model = GPT2Model(num_tokentypes=0, parallel_output=True)
    else:
        model = GPT2ModelPipe(num_tokentypes=0,
                              parallel_output=True,
                              topology=mpu.get_topology())
        # This is a hack to give us a reference to get_batch_pipe from within training.py
        # We need to call model.set_batch_fn after deepspeed.initialize
        model._megatron_batch_fn = get_batch_pipe

    ## Wandb
    use_wandb = get_wandb_api_key() is not None
    set_use_wandb(use_wandb)
    args_dict = vars(args)
    if use_wandb:
        # only display system stats from one worker per machine
        wandb_settings = wandb.Settings() if is_local_main(
        ) else wandb.Settings(_disable_stats=True)
        group_name = args_dict.get('wandb_group')
        name = f'{socket.gethostname()}-{local_rank()}' if group_name else None

        try:
            wandb.init(project="neox",
                       group=group_name,
                       name=name,
                       save_code=False,
                       force=False,
                       entity=args_dict.get('wandb_team'),
                       settings=wandb_settings)
        except UsageError as e:
            set_use_wandb(False)
            print(e)
            print(
                'Skipping wandb. Execute `wandb login` on local or main node machine to enable.'
            )

    if use_wandb:
        wandb.config.update(args_dict)

    return model

Пример #4

Показать файл

Файл: training.py Проект: EleutherAI/gpt-neox

def get_model(neox_args, use_cache=False):
    """Build the model."""

    print_rank_0("building GPT2 model ...")

    # Build model on cpu.
    model = GPT2ModelPipe(
        neox_args=neox_args,
        num_tokentypes=0,
        parallel_output=True,
        topology=mpu.get_topology(),
        use_cache=use_cache,
    )

    ### soft prompt tuning stuff ###
    if neox_args.soft_prompt_tuning is not None and neox_args.soft_prompt_tuning.get(
            "enabled", False):
        soft_prompt = SoftEmbedding(
            neox_args,
            wte=getattr(model, "0").word_embeddings,
            n_tokens=neox_args.soft_prompt_tuning.get("n_tokens", 10),
            init_string=neox_args.soft_prompt_tuning.get("init_string", ""),
            init_range=neox_args.soft_prompt_tuning.get("init_range", 0.5),
        )
        model.insert_layers(
            layers=soft_prompt, idx=1
        )  # insert the soft prompt layer directly after the word embeddings

        # freeze everything but the soft prompt
        for name, param in model.named_parameters():
            if not "soft_embedding" in name:
                param.requires_grad = False

    if not neox_args.is_pipe_parallel:
        # Export PipeParallel model to nn.Sequential model to avoid the overhead of deepspeed's pipe parallel training
        model = model.to_sequential()

    if neox_args.deepspeed:
        # DeepSpeed handles CUDA, FP16, and DDP components.
        return model
    else:
        raise ValueError("Must be using deepspeed to run neox")