def get_model(neox_args, inference=False, get_key_value=True): """Build the model.""" print_rank_0('building GPT2 model ...') # Build model on cpu. model = GPT2ModelPipe(neox_args=neox_args, num_tokentypes=0, parallel_output=True, topology=mpu.get_topology(), inference=inference, get_key_value=get_key_value) if not neox_args.is_pipe_parallel: # Export PipeParallel model to nn.Sequential model to avoid the overhead of deepspeed's pipe parallel training model = model.to_sequential() else: # This is a hack to give us a reference to get_batch_pipe from within training.py # We need to call model.set_batch_fn after deepspeed.initialize model._megatron_batch_fn = partial(get_batch_pipe, neox_args=neox_args) if neox_args.deepspeed: # DeepSpeed handles CUDA, FP16, and DDP components. return model else: raise ValueError("Must be using deepspeed to run neox")
def model_provider(): """Build the model.""" args = get_args() print_rank_0('building GPT2 model ...') if args.pipe_parallel_size == 0: model = GPT2Model(num_tokentypes=0, parallel_output=True) else: model = GPT2ModelPipe(num_tokentypes=0, parallel_output=True, topology=mpu.get_topology()) # This is a hack to give us a reference to get_batch_pipe from within training.py # We need to call model.set_batch_fn after deepspeed.initialize model._megatron_batch_fn = get_batch_pipe return model
def model_provider(): """Build the model.""" args = get_args() print_rank_0('building GPT2 model ...') if args.pipe_parallel_size == 0: model = GPT2Model(num_tokentypes=0, parallel_output=True) else: model = GPT2ModelPipe(num_tokentypes=0, parallel_output=True, topology=mpu.get_topology()) # This is a hack to give us a reference to get_batch_pipe from within training.py # We need to call model.set_batch_fn after deepspeed.initialize model._megatron_batch_fn = get_batch_pipe ## Wandb use_wandb = get_wandb_api_key() is not None set_use_wandb(use_wandb) args_dict = vars(args) if use_wandb: # only display system stats from one worker per machine wandb_settings = wandb.Settings() if is_local_main( ) else wandb.Settings(_disable_stats=True) group_name = args_dict.get('wandb_group') name = f'{socket.gethostname()}-{local_rank()}' if group_name else None try: wandb.init(project="neox", group=group_name, name=name, save_code=False, force=False, entity=args_dict.get('wandb_team'), settings=wandb_settings) except UsageError as e: set_use_wandb(False) print(e) print( 'Skipping wandb. Execute `wandb login` on local or main node machine to enable.' ) if use_wandb: wandb.config.update(args_dict) return model
def get_model(neox_args, use_cache=False): """Build the model.""" print_rank_0("building GPT2 model ...") # Build model on cpu. model = GPT2ModelPipe( neox_args=neox_args, num_tokentypes=0, parallel_output=True, topology=mpu.get_topology(), use_cache=use_cache, ) ### soft prompt tuning stuff ### if neox_args.soft_prompt_tuning is not None and neox_args.soft_prompt_tuning.get( "enabled", False): soft_prompt = SoftEmbedding( neox_args, wte=getattr(model, "0").word_embeddings, n_tokens=neox_args.soft_prompt_tuning.get("n_tokens", 10), init_string=neox_args.soft_prompt_tuning.get("init_string", ""), init_range=neox_args.soft_prompt_tuning.get("init_range", 0.5), ) model.insert_layers( layers=soft_prompt, idx=1 ) # insert the soft prompt layer directly after the word embeddings # freeze everything but the soft prompt for name, param in model.named_parameters(): if not "soft_embedding" in name: param.requires_grad = False if not neox_args.is_pipe_parallel: # Export PipeParallel model to nn.Sequential model to avoid the overhead of deepspeed's pipe parallel training model = model.to_sequential() if neox_args.deepspeed: # DeepSpeed handles CUDA, FP16, and DDP components. return model else: raise ValueError("Must be using deepspeed to run neox")