def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') trainer = None if cfg.trainer.precision == 16: trainer = Trainer( plugins=[ NLPDDPPlugin(), NLPNativeMixedPrecisionPlugin( init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), ), ], **cfg.trainer, ) elif cfg.trainer.precision == 'bf16': trainer = Trainer(plugins=[NLPDDPPlugin(), NLPNativeBfloat16PrecisionPlugin(),], **cfg.trainer,) else: trainer = Trainer(plugins=[NLPDDPPlugin(), NLPPrecisionPlugin()], **cfg.trainer) app_state = AppState() app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from( cfg.restore_from_path, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector(), ) # Note: most nemo models must have the data paths configured before instantiating the model # MegatronGPTMOdel sets up the data in the PTL method .setup which happens after DDP spawns. model.cfg.data.splits_string = cfg.model.data.splits_string trainer.test(model)
def split_partition(model, partitions, tp_size, write_path=None): if len(partitions) != 1: raise ValueError( "Can only split partitions of model with TP=1. For partitions of models with TP>1, merge first." ) if tp_size < 1: raise ValueError("TP size must to be >= 1.") app_state = AppState() app_state.data_parallel_rank = 0 app_state.model_parallel_size = tp_size app_state.model_parallel_rank = tp_size - 1 idx = 0 splits = [] for _, param in model.named_parameters(): if param.shape == partitions[0][idx].shape: split = [partitions[0][idx].data] * tp_size elif param.shape[0] == partitions[0][idx].shape[0]: split = torch.split(partitions[0][idx].data, param.shape[-1], dim=-1) else: split = torch.split(partitions[0][idx].data, param.shape[0], dim=0) splits.append(split) idx += 1 for i in range(tp_size - 1, -1, -1): app_state.model_parallel_rank = i idx = 0 for name, param in model.named_parameters(): split_val = splits[idx][i] if param.shape != split_val.shape: logging.info( f"Warning: Shape mismatch for parameter {name} required shape: {param.shape}, split shape: {split_val.shape}. Padding to match required size." ) if split_val.shape[1:] == param.shape[1:]: pad = [0, 0] * len(split_val.shape) pad[-1] = param.shape[0] - split_val.shape[0] split_val = torch.nn.functional.pad( split_val, pad, 'constant') elif split_val.shape[:-1] == param.shape[:-1]: pad = [0, param.shape[-1] - split_val.shape[-1]] split_val = torch.nn.functional.pad( split_val, pad, 'constant') else: raise RuntimeError( f"Can not handle parameter {name}, required shape: {param.shape}, split shape: {split_val.shape}." ) param.data = split_val idx += 1 if write_path is not None: model.save_to(write_path)
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument("--prompt", type=str, default="", required=True, help="Prompt for the model (a text to complete)") parser.add_argument("--tokens_to_generate", type=int, default="16", required=False, help="How many tokens to add to prompt") parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=True, ) args = parser.parse_args() torch.set_grad_enabled(False) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), devices=args.tensor_model_parallel_size, precision=16, accelerator='gpu') app_state = AppState() if args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) model = MegatronT5Model.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() request = { "prompt": args.prompt, "tokens_to_generate": args.tokens_to_generate, } dataset = T5RequestDataset(request, model.tokenizer) request_dl = DataLoader(dataset) response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************")
def main(cfg) -> None: # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), **cfg.trainer) assert ( cfg.trainer.devices * cfg.trainer.num_nodes == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" app_state = AppState() app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, app_state.model_parallel_size, app_state.data_parallel_size, app_state.pipeline_model_parallel_split_rank, ) = fake_initialize_model_parallel( world_size=app_state.model_parallel_size, rank=trainer.global_rank, tensor_model_parallel_size_=cfg.tensor_model_parallel_size, pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, ) if cfg.model_file is not None: if not os.path.exists(cfg.model_file): raise ValueError(f"Model file {cfg.model_file} does not exist") model = MegatronNMTModel.restore_from( restore_path=cfg.model_file, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector(), ) elif cfg.checkpoint_dir is not None: checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) model = MegatronNMTModel.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) else: raise ValueError("need at least a nemo file or checkpoint dir") model.freeze() logging.info(f"Translating: {cfg.srctext}") src_text = [] translations = [] with open(cfg.srctext, 'r') as src_f, open(cfg.tgtout, 'w') as tgt_f: for line in src_f: src_text.append(line.strip()) if len(src_text) == cfg.batch_size: translations = model.translate( text=src_text, source_lang=cfg.source_lang, target_lang=cfg.target_lang, ) for translation in translations: tgt_f.write(translation + "\n") src_text = [] if len(src_text) > 0: translations = model.translate(text=src_text, source_lang=cfg.source_lang, target_lang=cfg.target_lang,) for translation in translations: tgt_f.write(translation + "\n")
def main(cfg) -> None: # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), **cfg.trainer) assert ( cfg.trainer.devices * cfg.trainer.num_nodes == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" # Load prompt tuned model, virtual_prompt_model_file must be provided in config if cfg.get('virtual_prompt_model_file', None) is not None: # Update frozen GPT model path in case it has changed prompt_learning_cfg = MegatronGPTPromptLearningModel.restore_from( cfg.virtual_prompt_model_file, trainer=trainer, return_config=True) with open_dict(prompt_learning_cfg): prompt_learning_cfg.language_model_path = cfg.gpt_model_file # Now load prompt learning model with frozen gpt model base model = MegatronGPTPromptLearningModel.restore_from( restore_path=cfg.virtual_prompt_model_file, trainer=trainer, override_config_path=prompt_learning_cfg) # Or load regular GPT model elif cfg.gpt_model_file: model = MegatronGPTModel.restore_from(restore_path=cfg.gpt_model_file, trainer=trainer) elif cfg.checkpoint_dir: app_state = AppState() if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, app_state.model_parallel_size, app_state.data_parallel_size, app_state.pipeline_model_parallel_split_rank, ) = fake_initialize_model_parallel( world_size=app_state.model_parallel_size, rank=trainer.global_rank, tensor_model_parallel_size_=cfg.tensor_model_parallel_size, pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, pipeline_model_parallel_split_rank_=cfg. pipeline_model_parallel_split_rank, ) checkpoint_path = inject_model_parallel_rank( os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) model = MegatronGPTModel.load_from_checkpoint( checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) else: raise ValueError("need at least a nemo file or checkpoint dir") model.freeze() # Have to turn off activations_checkpoint_method for inference try: model.model.language_model.encoder.activations_checkpoint_method = None except AttributeError: pass try: model.frozen_model.language_model.encoder.activations_checkpoint_method = None except AttributeError: pass length_params: LengthParam = { "max_length": cfg.inference.tokens_to_generate, "min_length": cfg.inference.min_tokens_to_generate, } sampling_params: SamplingParam = { "use_greedy": cfg.inference.greedy, "temperature": cfg.inference.temperature, "top_k": cfg.inference.top_k, "top_p": cfg.inference.top_p, "repetition_penalty": cfg.inference.repetition_penalty, "add_BOS": cfg.inference.add_BOS, "all_probs": cfg.inference.all_probs, "compute_logprob": cfg.inference.compute_logprob, } # First method of running text generation, call model.generate method response = model.generate(inputs=OmegaConf.to_container(cfg.prompts), length_params=length_params, sampling_params=sampling_params) print("***************************") print(response) print("***************************") # Second method of running text generation, call trainer.predict collate_fn = None if cfg.get('virtual_prompt_model', False): collate_fn = lambda x: list(x) ds = RequestDataSet(OmegaConf.to_container(cfg.prompts)) request_dl = DataLoader(dataset=ds, collate_fn=collate_fn, batch_size=2) config = OmegaConf.to_container(cfg.inference) model.set_inference_config(config) response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************") # Third method of running text generation, use inference server if cfg.server: if parallel_state.is_pipeline_first_stage( ) and parallel_state.get_tensor_model_parallel_rank() == 0: server = MegatronServer(model.cuda()) server.run("0.0.0.0", port=cfg.port) while True: choice = torch.cuda.LongTensor(1) torch.distributed.broadcast(choice, 0) if choice[0].item() == 0: generate(model.cuda())
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) plugins = [ NLPDDPPlugin( no_ddp_communication_hook=True, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) ] if cfg.trainer.precision in [16, 'bf16']: scaler = None if cfg.trainer.precision == 16: scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2**32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) if megatron_amp_o2: plugins.append( MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append( PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer) exp_manager(trainer, cfg.exp_manager) app_state = AppState() if cfg.model.tensor_model_parallel_size > 1 or cfg.model.pipeline_model_parallel_size > 1: app_state.model_parallel_size = cfg.model.tensor_model_parallel_size * cfg.model.pipeline_model_parallel_size ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, app_state.model_parallel_size, _, ) = fake_initialize_model_parallel( world_size=app_state.model_parallel_size, rank=trainer.global_rank, tensor_model_parallel_size_=cfg.model.tensor_model_parallel_size, pipeline_model_parallel_size_=cfg.model. pipeline_model_parallel_size, ) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, ) # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams with open_dict(cfg): cfg.model.precision = cfg.trainer.precision model = MegatronGPTModel.restore_from(cfg.restore_from_path, cfg.model, trainer=trainer) trainer.fit(model)
def main(cfg: DictConfig) -> None: pl.seed_everything(42) logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') plugin = NLPDDPPlugin() trainer = pl.Trainer(**cfg.trainer, plugins=plugin) exp_manager(trainer, cfg.get("exp_manager", None)) app_state = AppState() if cfg.model.tensor_model_parallel_size > 1: app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) if 'bert' in cfg.model.language_model.pretrained_model_name: if cfg.model.dataset.task == 'sgd': model_class = SGDQAModel else: model_class = IntentSlotClassificationModel elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower(): model_class = DialogueGPTModel if cfg.pretrained_model or (cfg.model.nemo_path and os.path.exists(cfg.model.nemo_path)): if cfg.pretrained_model: logging.info(f'Loading pretrained model {cfg.pretrained_model}') model = model_class.from_pretrained(cfg.pretrained_model) else: logging.info(f'Restoring model from {cfg.model.nemo_path}') model = model_class.restore_from(cfg.model.nemo_path) if cfg.do_training: model.setup_training_data(train_data_config=cfg.model.train_ds) model.setup_multiple_validation_data( val_data_config=cfg.model.validation_ds) else: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = model_class(cfg.model, trainer=trainer) if cfg.do_training: trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) else: data_dir = cfg.model.dataset.get('data_dir', None) dialogues_example_dir = cfg.model.dataset.get('dialogues_example_dir', None) if data_dir is None or dialogues_example_dir is None: raise ValueError( 'No dataset directory provided. Skipping evaluation. ') elif not os.path.exists(data_dir): raise ValueError( f'{data_dir} is not found, skipping evaluation on the test set.' ) else: model.update_data_dirs(data_dir=data_dir, dialogues_example_dir=dialogues_example_dir) model._cfg.dataset = cfg.model.dataset if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None: trainer = pl.Trainer(devices=1, accelerator=cfg.trainer.accelerator, plugins=plugin, precision=16) model.setup_multiple_test_data(test_data_config=cfg.model.test_ds) if model.prepare_test(trainer): trainer.test(model)
def main(): parser = ArgumentParser() parser.add_argument("--use_soft_prompts", action="store_true", help="Use model's existing soft prompts") parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument( "--path_to_file", type=str, default="", required=False, help="Path to file with prompts (a text to complete)" ) parser.add_argument( "--prompt", type=str, default="", required=False, help="Prompt for the model (a text to complete)" ) parser.add_argument( "--prompt_tag", type=str, default="", required=False, help="Prompt tag string for task specific soft prompt" ) parser.add_argument( "--tokens_to_generate", type=int, default="1", required=False, help="How many tokens to add to prompt" ) parser.add_argument( "--stop_after_sentence", type=bool, default="True", required=False, help="True/False: whether to stop after full sentence has been generated.", ) parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=False, ) parser.add_argument("--precision", default=16, help="PyTorch Lightning Trainer precision flag") parser.add_argument("--batch_size", default=1, required=False, help="Evaluation batch_size") parser.add_argument( "--compute_logprobs", type=bool, default=False, required=False, help="Method for logprobs computation" ) args = parser.parse_args() # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), gpus=args.tensor_model_parallel_size, precision=args.precision) app_state = AppState() if args.tensor_model_parallel_size is not None and args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() def pad_collate(batch): tokens, tokens_to_generate = batch[0]['data'], batch[0]['tokens_to_generate'] compute_logprobs = batch[0]['compute_logprobs'] lens = [len(token) for token in tokens] tokens_pad = pad_sequence(tokens, batch_first=False, padding_value=50256) data = [] if 'prompt_tags' in batch[0]: # Keep track of soft prompt tags prompt_tags = batch[0]['prompt_tags'] for token, lenn, prompt_tag in zip(tokens_pad.T, lens, prompt_tags): data.append((token, lenn, tokens_to_generate, compute_logprobs, prompt_tag)) else: for token, lenn in zip(tokens_pad.T, lens): data.append((token, lenn, tokens_to_generate, compute_logprobs)) return data # defining type of request if args.path_to_file != "": request = [] prompts = open(args.path_to_file, 'r') for prompt in prompts.readlines(): prompt = prompt.split('\n')[0] if args.use_soft_prompts and model.use_soft_prompts: prompt = json.loads(prompt) request.append(prompt) dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=int(args.batch_size)) else: if args.use_soft_prompts and model.use_soft_prompts: request = [{'prompt_tag': args.prompt_tag, 'text': args.prompt}] else: request = [args.prompt] dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=1) # For GPT models that have had soft prompt tuning but you don't want to use any soft prompts if not args.use_soft_prompts and model.use_soft_prompts: model.use_soft_prompts = False response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************")
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, required=True, help="Path to source .nemo file") parser.add_argument("--target_file", type=str, required=True, help="Path to write target .nemo file") parser.add_argument("--tensor_model_parallel_size", type=int, required=True, help="TP size of source model") parser.add_argument("--target_tensor_model_parallel_size", type=int, required=True, help="TP size of target model") parser.add_argument( "--model_class", type=str, default= "nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel", help= "NeMo model class. This script should support all NeMo megatron models that use Tensor Parallel", ) parser.add_argument("--precision", default=16, help="PyTorch Lightning Trainer precision flag") args = parser.parse_args() precision = args.precision if args.precision in ["32", "16"]: precision = int(float(args.precision)) tp_size = args.tensor_model_parallel_size tgt_tp_size = args.target_tensor_model_parallel_size cls = model_utils.import_class_by_path(args.model_class) trainer = Trainer(devices=1, plugins=NLPDDPPlugin(), accelerator="cpu", precision=precision) app_state = AppState() app_state.data_parallel_rank = 0 app_state.pipeline_model_parallel_size = 1 # not supported yet in this script app_state.tensor_model_parallel_size = tp_size app_state.model_parallel_size = app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size if tp_size > 1: partitions = [] for i in range(tp_size): app_state.tensor_model_parallel_rank = i model = cls.restore_from(restore_path=args.model_file, trainer=trainer, map_location=torch.device("cpu")) params = [p for _, p in model.named_parameters()] partitions.append(params) # app_state is being updated incorrectly during restore app_state.data_parallel_rank = 0 app_state.pipeline_model_parallel_size = 1 # not supported yet in this script app_state.tensor_model_parallel_size = tp_size app_state.model_parallel_size = ( app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size) model.cfg.tensor_model_parallel_size = 1 app_state.model_parallel_size = 1 trainer = Trainer(devices=1, plugins=NLPDDPPlugin(), accelerator="cpu", precision=precision) model = cls(model.cfg, trainer).to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() if tgt_tp_size > 1: merge_partition(model, partitions) else: merge_partition(model, partitions, args.target_file) else: app_state.model_parallel_size = 1 model = cls.restore_from(restore_path=args.model_file, trainer=trainer) if tgt_tp_size > 1: partitions = [] params = [p for _, p in model.named_parameters()] partitions.append(params) model.cfg.tensor_model_parallel_size = tgt_tp_size app_state.model_parallel_size = tgt_tp_size trainer = Trainer(devices=1, plugins=NLPDDPPlugin(), accelerator="cpu", precision=precision) model = cls(model.cfg, trainer).to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() split_partition(model, partitions, tgt_tp_size, args.target_file) logging.info("Successfully finished changing partitions!")
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument("--prompt", type=str, default="", required=True, help="Prompt for the model (a text to complete)") parser.add_argument("--tokens_to_generate", type=int, default="64", required=False, help="How many tokens to add to prompt") parser.add_argument( "--stop_after_sentence", type=bool, default="True", required=False, help= "True/False: whether to stop after full sentence has been generated.", ) parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=True, ) parser.add_argument("--precision", default=32, help="PyTorch Lightning Trainer precision flag") args = parser.parse_args() # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), gpus=args.tensor_model_parallel_size, precision=args.precision) app_state = AppState() if args.tensor_model_parallel_size is not None and args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() request = { "prompt": args.prompt, "tokens_to_generate": args.tokens_to_generate, "stop_after_sentence": args.stop_after_sentence, } dataset = GPTRequestDataset(request, model.tokenizer) request_dl = DataLoader(dataset) response = trainer.predict(model, request_dl) print("***************************") print(response[0]['completion']['text']) print("***************************") logging.info( f"Generation stopped because: {response[0]['completion']['stop reason']}" )
def main(cfg: DictConfig) -> None: pl.seed_everything(42) logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') try: plugin = NLPDDPPlugin() except (ImportError, ModuleNotFoundError): plugin = None trainer = pl.Trainer(**cfg.trainer, plugins=plugin) exp_manager(trainer, cfg.get("exp_manager", None)) app_state = AppState() if cfg.model.tensor_model_parallel_size > 1: app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) if 'bert' in cfg.model.language_model.pretrained_model_name: if cfg.model.dataset.task == 'sgd': if cfg.model.original_nemo_checkpoint is not None: model_class = DialogueZeroShotIntentModel else: model_class = SGDQAModel elif cfg.model.dataset.task in ['zero_shot', 'design']: model_class = DialogueZeroShotIntentModel else: model_class = IntentSlotClassificationModel elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower(): if cfg.model.dataset.task in ['ms_marco', 'mellon_qa']: model_class = DialogueGPTGenerationModel else: model_class = DialogueGPTClassificationModel elif ('bart' in cfg.model.language_model.pretrained_model_name.lower() or 't5' in cfg.model.language_model.pretrained_model_name.lower()): # please use bf16/32 with t5-large and above # see https://github.com/huggingface/transformers/pull/10956 model_class = DialogueS2SGenerationModel elif 'sentence-transformers' in cfg.model.language_model.pretrained_model_name.lower( ): model_class = DialogueNearestNeighbourModel if cfg.pretrained_model or (cfg.model.nemo_path and os.path.exists(cfg.model.nemo_path)): if cfg.pretrained_model: logging.info(f'Loading pretrained model {cfg.pretrained_model}') model = model_class.from_pretrained(cfg.pretrained_model) else: logging.info(f'Restoring model from {cfg.model.nemo_path}') model = model_class.restore_from(cfg.model.nemo_path) if cfg.do_training: model.setup_training_data(train_data_config=cfg.model.train_ds) model.setup_multiple_validation_data( val_data_config=cfg.model.validation_ds) else: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = model_class(cfg.model, trainer=trainer) if cfg.do_training: trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) else: data_dir = cfg.model.dataset.get('data_dir', None) dialogues_example_dir = cfg.model.dataset.get('dialogues_example_dir', None) if data_dir is None or dialogues_example_dir is None: raise ValueError( 'No dataset directory provided. Skipping evaluation. ') elif not os.path.exists(data_dir): raise ValueError( f'{data_dir} is not found, skipping evaluation on the test set.' ) else: if hasattr(model, "update_data_dirs"): model.update_data_dirs( data_dir=data_dir, dialogues_example_dir=dialogues_example_dir) model._cfg.dataset = cfg.model.dataset if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None: eval_device = [cfg.trainer.devices[0]] if isinstance( cfg.trainer.devices, list) else 1 trainer = pl.Trainer(devices=eval_device, accelerator=cfg.trainer.accelerator, precision=16) model.setup_multiple_test_data(test_data_config=cfg.model.test_ds) if model.prepare_test(trainer): trainer.test(model)
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument("--prompt", type=str, default="", required=True, help="Prompt for the model (a text to complete)") parser.add_argument("--tokens_to_generate", type=int, default="16", required=False, help="How many tokens to add to prompt") parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=False, ) parser.add_argument( "--pipeline_model_parallel_size", type=int, default=1, required=False, ) parser.add_argument( "--pipeline_model_parallel_split_rank", type=int, default=0, required=False, ) parser.add_argument("--precision", default="16", type=str, help="PyTorch Lightning Trainer precision flag") args = parser.parse_args() # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer( plugins=NLPDDPPlugin(), devices=args.tensor_model_parallel_size * args.pipeline_model_parallel_size, accelerator='gpu', precision=args.precision, ) app_state = AppState() if args.tensor_model_parallel_size > 1 or args.pipeline_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size * args.pipeline_model_parallel_size ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, app_state.model_parallel_size, app_state.data_parallel_size, app_state.pipeline_model_parallel_split_rank, ) = fake_initialize_model_parallel( world_size=app_state.model_parallel_size, rank=trainer.global_rank, tensor_model_parallel_size_=args.tensor_model_parallel_size, pipeline_model_parallel_size_=args.pipeline_model_parallel_size, pipeline_model_parallel_split_rank_=args. pipeline_model_parallel_split_rank, ) model = MegatronT5Model.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() request = { "prompt": args.prompt, "tokens_to_generate": args.tokens_to_generate, } dataset = T5RequestDataset(request, model.tokenizer) request_dl = DataLoader(dataset) response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************")
def main(): parser = ArgumentParser() # args for loading the model, either from .nemo file or from PTL checkpoint parser.add_argument("--model_file", type=str, default="", required=False, help="Pass path to model's .nemo file") parser.add_argument( "--checkpoint_dir", type=str, default=None, required=False, help= "If not using a .nemo file. Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", ) parser.add_argument( "--checkpoint_name", type=str, default=None, required=False, help= "If not using a .nemo file. Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", ) parser.add_argument( "--hparams_file", type=str, default=None, required=False, help= "If not using a .nemo file. Path to config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", ) parser.add_argument("--tensor_model_parallel_size", type=int, default=1, required=False, help="Needed if not using a .nemo file") parser.add_argument( "--pipeline_model_parallel_size", type=int, default=1, required=False, help="Needed if not using a .nemo file", ) # PTL Trainer args parser.add_argument("--devices", default=1, type=int, help="PyTorch Lightning Trainer devices flag") parser.add_argument("--num_nodes", default=1, type=int, help="PyTorch Lightning Trainer num_nodes flag") parser.add_argument("--precision", default=16, help="PyTorch Lightning Trainer precision flag") # evaluation args parser.add_argument("--path_to_file", type=str, default="", required=False, help="Path to file with prompts (a text to complete)") parser.add_argument("--prompt", type=str, default="", required=False, help="Prompt for the model (a text to complete)") parser.add_argument("--use_soft_prompts", action="store_true", help="Use model's existing soft prompts") parser.add_argument("--prompt_tag", type=str, default="", required=False, help="Prompt tag string for task specific soft prompt") parser.add_argument("--tokens_to_generate", type=int, default="1", required=False, help="How many tokens to add to prompt") parser.add_argument( "--stop_after_sentence", type=bool, default="True", required=False, help= "True/False: whether to stop after full sentence has been generated.", ) parser.add_argument("--batch_size", default=1, type=int, required=False, help="Evaluation batch_size") parser.add_argument("--compute_logprobs", type=bool, default=False, required=False, help="Method for logprobs computation") args = parser.parse_args() assert ( args.devices * args.num_nodes == args.tensor_model_parallel_size * args.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" if args.model_file and args.checkpoint_dir: raise ValueError( "Only one of model_file or checkpoint_dir should be used") # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer( plugins=[NLPDDPPlugin()], devices=args.devices, num_nodes=args.num_nodes, accelerator='gpu', precision=args.precision, ) if args.model_file: model = MegatronGPTModel.restore_from(restore_path=args.model_file, trainer=trainer) elif args.checkpoint_dir: app_state = AppState() if args.tensor_model_parallel_size > 1 or args.pipeline_model_parallel_size > 1: app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size app_state.tensor_model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_size = args.tensor_model_parallel_size * args.pipeline_model_parallel_size ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, app_state.model_parallel_size, _, ) = fake_initialize_model_parallel( world_size=app_state.model_parallel_size, rank=trainer.global_rank, tensor_model_parallel_size_=app_state. tensor_model_parallel_size, pipeline_model_parallel_size_=app_state. pipeline_model_parallel_size, ) # inject model parallel rank checkpoint_path = inject_model_parallel_rank( os.path.join(args.checkpoint_dir, args.checkpoint_name)) model = MegatronGPTModel.load_from_checkpoint( checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) model.freeze() def pad_collate(batch): tokens, tokens_to_generate = batch[0]['data'], batch[0][ 'tokens_to_generate'] compute_logprobs = batch[0]['compute_logprobs'] lens = [len(token) for token in tokens] tokens_pad = pad_sequence(tokens, batch_first=False, padding_value=50256) data = [] if 'prompt_tags' in batch[0]: # Keep track of soft prompt tags prompt_tags = batch[0]['prompt_tags'] for token, lenn, prompt_tag in zip(tokens_pad.T, lens, prompt_tags): data.append((token, lenn, tokens_to_generate, compute_logprobs, prompt_tag)) else: for token, lenn in zip(tokens_pad.T, lens): data.append( (token, lenn, tokens_to_generate, compute_logprobs)) return data # defining type of request if args.path_to_file != "": request = [] prompts = open(args.path_to_file, 'r', encoding='utf-8') for prompt in prompts.readlines(): prompt = prompt.split('\n')[0] if args.use_soft_prompts and model.use_soft_prompts: prompt = json.loads(prompt) request.append(prompt) dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=int(args.batch_size)) else: if args.use_soft_prompts and model.use_soft_prompts: request = [{'prompt_tag': args.prompt_tag, 'text': args.prompt}] else: request = [args.prompt] dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=1) # For GPT models that have had soft prompt tuning but you don't want to use any soft prompts if not args.use_soft_prompts and model.use_soft_prompts: model.use_soft_prompts = False response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************") if args.prompt and not args.compute_logprobs: print(f'Prompt: {args.prompt}\n\nResponse: {response[0][0][0]}')