def initialize_model_parallel_for_nemo( world_size, global_rank, local_rank, tensor_model_parallel_size=1, seed=1234, ): # updating NeMo globals app_state = AppState() app_state.global_rank = global_rank app_state.world_size = world_size app_state.model_parallel_size = tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( local_rank, tensor_model_parallel_size) # update apex.mpu globals set_tensor_model_parallel_world_size(tensor_model_parallel_size) set_tensor_model_parallel_rank(app_state.model_parallel_rank) # pipeline model parallelism not implemented in NeMo yet set_pipeline_model_parallel_rank(0) set_pipeline_model_parallel_world_size(1) _set_random_seed(seed) app_state._is_megatron_initialized = True
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') trainer = None if cfg.trainer.precision == 16: trainer = Trainer( plugins=[ NLPDDPPlugin(), NLPNativeMixedPrecisionPlugin( init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), ), ], **cfg.trainer, ) elif cfg.trainer.precision == 'bf16': trainer = Trainer(plugins=[NLPDDPPlugin(), NLPNativeBfloat16PrecisionPlugin(),], **cfg.trainer,) else: trainer = Trainer(plugins=[NLPDDPPlugin(), NLPPrecisionPlugin()], **cfg.trainer) app_state = AppState() app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from( cfg.restore_from_path, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector(), ) # Note: most nemo models must have the data paths configured before instantiating the model # MegatronGPTMOdel sets up the data in the PTL method .setup which happens after DDP spawns. model.cfg.data.splits_string = cfg.model.data.splits_string trainer.test(model)
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument("--prompt", type=str, default="", required=True, help="Prompt for the model (a text to complete)") parser.add_argument("--tokens_to_generate", type=int, default="16", required=False, help="How many tokens to add to prompt") parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=True, ) args = parser.parse_args() torch.set_grad_enabled(False) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), devices=args.tensor_model_parallel_size, precision=16, accelerator='gpu') app_state = AppState() if args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) model = MegatronT5Model.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() request = { "prompt": args.prompt, "tokens_to_generate": args.tokens_to_generate, } dataset = T5RequestDataset(request, model.tokenizer) request_dl = DataLoader(dataset) response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************")
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') plugins = [NLPDDPPlugin(num_nodes=cfg.trainer.num_nodes)] if cfg.trainer.precision == 16: scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2**32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), ) plugins.append( NativeMixedPrecisionPlugin(precision=16, device='cuda', scaler=scaler)) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer) exp_manager(trainer, cfg.exp_manager) # update resume from checkpoint found by exp_manager resume_from_checkpoint = trainer.checkpoint_connector.resume_from_checkpoint_fit_path if resume_from_checkpoint is not None: # inject mp_rank into resume_from_checkpoint if cfg.model.tensor_model_parallel_size is not None and cfg.model.tensor_model_parallel_size > 1: mp_rank = compute_model_parallel_rank( trainer.local_rank, cfg.model.tensor_model_parallel_size) resume_from_checkpoint = Path(resume_from_checkpoint) resume_from_checkpoint = resume_from_checkpoint.parent.parent.joinpath( f'mp_rank_{mp_rank:02d}').joinpath(resume_from_checkpoint.name) resume_from_checkpoint = str(resume_from_checkpoint) logging.info( f'Resuming training from checkpoint: {resume_from_checkpoint}') trainer.checkpoint_connector = CheckpointConnector( trainer, resume_from_checkpoint=resume_from_checkpoint) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, ) # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams with open_dict(cfg): cfg.model.precision = cfg.trainer.precision model = MegatronT5Model(cfg.model, trainer) trainer.fit(model)
def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') plugins = [NLPDDPPlugin(num_nodes=cfg.trainer.num_nodes)] if cfg.trainer.precision == 16: plugins.append( NLPNativeMixedPrecisionPlugin( init_scale=cfg.model.get('native_amp_init_scale', 2**32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), )) elif cfg.trainer.precision == 'bf16': plugins.append(NLPNativeBfloat16PrecisionPlugin()) else: plugins.append(NLPPrecisionPlugin()) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, **cfg.trainer) exp_manager(trainer, cfg.exp_manager) # update resume from checkpoint found by exp_manager resume_from_checkpoint = trainer.resume_from_checkpoint if resume_from_checkpoint is not None: mp_rank = compute_model_parallel_rank( trainer.local_rank, cfg.model.tensor_model_parallel_size) resume_from_checkpoint = Path(resume_from_checkpoint) resume_from_checkpoint = resume_from_checkpoint.parent.parent.joinpath( f'mp_rank_{mp_rank:02d}').joinpath(resume_from_checkpoint.name) resume_from_checkpoint = str(resume_from_checkpoint) logging.info( f'Resuming training from checkpoint: {resume_from_checkpoint}') trainer.checkpoint_connector = CheckpointConnector( trainer, resume_from_checkpoint=resume_from_checkpoint) # Override timer callback to a stateless one for idx, callback in enumerate(trainer.callbacks): if isinstance(callback, Timer): trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, ) model = MegatronGPTModel(cfg.model, trainer) trainer.fit(model)
def main(cfg: DictConfig) -> None: pl.seed_everything(42) logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') plugin = NLPDDPPlugin() trainer = pl.Trainer(**cfg.trainer, plugins=plugin) exp_manager(trainer, cfg.get("exp_manager", None)) app_state = AppState() if cfg.model.tensor_model_parallel_size > 1: app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) if 'bert' in cfg.model.language_model.pretrained_model_name: if cfg.model.dataset.task == 'sgd': model_class = SGDQAModel else: model_class = IntentSlotClassificationModel elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower(): model_class = DialogueGPTModel if cfg.pretrained_model or (cfg.model.nemo_path and os.path.exists(cfg.model.nemo_path)): if cfg.pretrained_model: logging.info(f'Loading pretrained model {cfg.pretrained_model}') model = model_class.from_pretrained(cfg.pretrained_model) else: logging.info(f'Restoring model from {cfg.model.nemo_path}') model = model_class.restore_from(cfg.model.nemo_path) if cfg.do_training: model.setup_training_data(train_data_config=cfg.model.train_ds) model.setup_multiple_validation_data( val_data_config=cfg.model.validation_ds) else: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = model_class(cfg.model, trainer=trainer) if cfg.do_training: trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) else: data_dir = cfg.model.dataset.get('data_dir', None) dialogues_example_dir = cfg.model.dataset.get('dialogues_example_dir', None) if data_dir is None or dialogues_example_dir is None: raise ValueError( 'No dataset directory provided. Skipping evaluation. ') elif not os.path.exists(data_dir): raise ValueError( f'{data_dir} is not found, skipping evaluation on the test set.' ) else: model.update_data_dirs(data_dir=data_dir, dialogues_example_dir=dialogues_example_dir) model._cfg.dataset = cfg.model.dataset if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None: trainer = pl.Trainer(devices=1, accelerator=cfg.trainer.accelerator, plugins=plugin, precision=16) model.setup_multiple_test_data(test_data_config=cfg.model.test_ds) if model.prepare_test(trainer): trainer.test(model)
def restore_from( cls, restore_path: str, override_config_path: Optional[Union[OmegaConf, str]] = None, map_location: Optional[torch.device] = None, strict: bool = True, return_config: bool = False, trainer: Trainer = None, save_restore_connector: SaveRestoreConnector = None, ): """ Restores model instance (weights and configuration) from .nemo file. Args: restore_path: path to .nemo file from which model should be instantiated override_config_path: path to a yaml config that will override the internal config file or an OmegaConf / DictConfig object representing the model config. map_location: Optional torch.device() to map the instantiated model to a device. By default (None), it will select a GPU if available, falling back to CPU otherwise. strict: Passed to load_state_dict. Set to True by default. return_config: If set to true, will return just the underlying config of the restored model as an OmegaConf DictConfig object without instantiating the model. trainer: PyTorch Lightning trainer. Must be passed in order to use model parallel .nemo Example: ``` model = nemo.collections.nlp.models.TokenClassificationModel.restore_from('token_classification.nemo') assert isinstance(model, nemo.collections.nlp.models.TokenClassificationModel) ``` Returns: An instance of type cls or its underlying config (if return_config is set). """ if save_restore_connector is None: save_restore_connector = SaveRestoreConnector() if not os.path.exists(restore_path): raise FileNotFoundError(f"Can't find {restore_path}") app_state = AppState() app_state.model_restore_path = os.path.abspath( os.path.expanduser(restore_path)) # detect if we have a model parallel .nemo file with tempfile.TemporaryDirectory() as tmpdir: cwd = os.getcwd() os.chdir(tmpdir) # detect if model parallel from tarfile tar = tarfile.open(app_state.model_restore_path, "r:gz") names = tar.getnames() mp_ranks = [] for name in names: if 'mp_rank' in name: mp_ranks.append(name) if mp_ranks: app_state.model_parallel_size = len( mp_ranks ) // 2 # directory and file are included in getnames() # get checkpoint version checkpoint_version_member = None for member in tar.getmembers(): if 'megatron_checkpoint_version.json' in member.name: checkpoint_version_member = member tar.extract(checkpoint_version_member, tmpdir) with open(checkpoint_version_member.name, 'r') as f: checkpoint_version = json.load(f).get( 'checkpoint_version', None) logging.info( (f'Detected model parallel .nemo file: {restore_path}. ' f'Assuming megatron model parallelism with ' f'model_parallel_size: {app_state.model_parallel_size} ' f'and checkpoint version: {checkpoint_version}')) tar.close() os.chdir(cwd) if app_state.model_parallel_size is not None: if not isinstance(trainer, Trainer): raise ValueError( "trainer must be a PyTorch Lightning Trainer to restore model parallel .nemo files." ) if checkpoint_version is None: raise ValueError( "Restoring from megatron model parallel .nemo but could not find megatron checkpoint version." ) else: logging.info( f"Setting megatron checkpoint version: {checkpoint_version}" ) set_checkpoint_version(checkpoint_version) app_state.world_size = trainer.num_gpus * trainer.num_nodes if trainer.local_rank is not None: app_state.local_rank = trainer.local_rank else: raise ValueError( "trainer.local_rank is None. local_rank needed to restore model parallel models." ) model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) app_state.model_parallel_rank = model_parallel_rank cls.update_save_restore_connector(save_restore_connector) restored_model = cls._save_restore_connector.restore_from( cls, app_state.model_restore_path, override_config_path, map_location, strict, return_config) restored_model.set_trainer(trainer) return restored_model else: return super().restore_from( app_state.model_restore_path, override_config_path, map_location, strict, return_config, save_restore_connector=save_restore_connector, )
def main(): parser = ArgumentParser() parser.add_argument("--use_soft_prompts", action="store_true", help="Use model's existing soft prompts") parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument( "--path_to_file", type=str, default="", required=False, help="Path to file with prompts (a text to complete)" ) parser.add_argument( "--prompt", type=str, default="", required=False, help="Prompt for the model (a text to complete)" ) parser.add_argument( "--prompt_tag", type=str, default="", required=False, help="Prompt tag string for task specific soft prompt" ) parser.add_argument( "--tokens_to_generate", type=int, default="1", required=False, help="How many tokens to add to prompt" ) parser.add_argument( "--stop_after_sentence", type=bool, default="True", required=False, help="True/False: whether to stop after full sentence has been generated.", ) parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=False, ) parser.add_argument("--precision", default=16, help="PyTorch Lightning Trainer precision flag") parser.add_argument("--batch_size", default=1, required=False, help="Evaluation batch_size") parser.add_argument( "--compute_logprobs", type=bool, default=False, required=False, help="Method for logprobs computation" ) args = parser.parse_args() # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), gpus=args.tensor_model_parallel_size, precision=args.precision) app_state = AppState() if args.tensor_model_parallel_size is not None and args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() def pad_collate(batch): tokens, tokens_to_generate = batch[0]['data'], batch[0]['tokens_to_generate'] compute_logprobs = batch[0]['compute_logprobs'] lens = [len(token) for token in tokens] tokens_pad = pad_sequence(tokens, batch_first=False, padding_value=50256) data = [] if 'prompt_tags' in batch[0]: # Keep track of soft prompt tags prompt_tags = batch[0]['prompt_tags'] for token, lenn, prompt_tag in zip(tokens_pad.T, lens, prompt_tags): data.append((token, lenn, tokens_to_generate, compute_logprobs, prompt_tag)) else: for token, lenn in zip(tokens_pad.T, lens): data.append((token, lenn, tokens_to_generate, compute_logprobs)) return data # defining type of request if args.path_to_file != "": request = [] prompts = open(args.path_to_file, 'r') for prompt in prompts.readlines(): prompt = prompt.split('\n')[0] if args.use_soft_prompts and model.use_soft_prompts: prompt = json.loads(prompt) request.append(prompt) dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=int(args.batch_size)) else: if args.use_soft_prompts and model.use_soft_prompts: request = [{'prompt_tag': args.prompt_tag, 'text': args.prompt}] else: request = [args.prompt] dataset = GPTRequestDataset(request, model.tokenizer, args.tokens_to_generate, args.compute_logprobs) request_dl = DataLoader(dataset=pad_collate(dataset), batch_size=1) # For GPT models that have had soft prompt tuning but you don't want to use any soft prompts if not args.use_soft_prompts and model.use_soft_prompts: model.use_soft_prompts = False response = trainer.predict(model, request_dl) print("***************************") print(response) print("***************************")
def main(): parser = ArgumentParser() parser.add_argument("--model_file", type=str, default="", required=True, help="Pass path to model's .nemo file") parser.add_argument("--prompt", type=str, default="", required=True, help="Prompt for the model (a text to complete)") parser.add_argument("--tokens_to_generate", type=int, default="64", required=False, help="How many tokens to add to prompt") parser.add_argument( "--stop_after_sentence", type=bool, default="True", required=False, help= "True/False: whether to stop after full sentence has been generated.", ) parser.add_argument( "--tensor_model_parallel_size", type=int, default=1, required=True, ) parser.add_argument("--precision", default=32, help="PyTorch Lightning Trainer precision flag") args = parser.parse_args() # cast precision to int if 32 or 16 if args.precision in ["32", "16"]: args.precision = int(float(args.precision)) # trainer required for restoring model parallel models trainer = Trainer(plugins=NLPDDPPlugin(), gpus=args.tensor_model_parallel_size, precision=args.precision) app_state = AppState() if args.tensor_model_parallel_size is not None and args.tensor_model_parallel_size > 1: app_state.model_parallel_size = args.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) model = MegatronGPTModel.restore_from(restore_path=args.model_file, trainer=trainer) model.freeze() request = { "prompt": args.prompt, "tokens_to_generate": args.tokens_to_generate, "stop_after_sentence": args.stop_after_sentence, } dataset = GPTRequestDataset(request, model.tokenizer) request_dl = DataLoader(dataset) response = trainer.predict(model, request_dl) print("***************************") print(response[0]['completion']['text']) print("***************************") logging.info( f"Generation stopped because: {response[0]['completion']['stop reason']}" )
def main(cfg: DictConfig) -> None: pl.seed_everything(42) logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') try: plugin = NLPDDPPlugin() except (ImportError, ModuleNotFoundError): plugin = None trainer = pl.Trainer(**cfg.trainer, plugins=plugin) exp_manager(trainer, cfg.get("exp_manager", None)) app_state = AppState() if cfg.model.tensor_model_parallel_size > 1: app_state.model_parallel_size = cfg.model.tensor_model_parallel_size app_state.model_parallel_rank = compute_model_parallel_rank( trainer.local_rank, app_state.model_parallel_size) if 'bert' in cfg.model.language_model.pretrained_model_name: if cfg.model.dataset.task == 'sgd': if cfg.model.original_nemo_checkpoint is not None: model_class = DialogueZeroShotIntentModel else: model_class = SGDQAModel elif cfg.model.dataset.task in ['zero_shot', 'design']: model_class = DialogueZeroShotIntentModel else: model_class = IntentSlotClassificationModel elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower(): if cfg.model.dataset.task in ['ms_marco', 'mellon_qa']: model_class = DialogueGPTGenerationModel else: model_class = DialogueGPTClassificationModel elif ('bart' in cfg.model.language_model.pretrained_model_name.lower() or 't5' in cfg.model.language_model.pretrained_model_name.lower()): # please use bf16/32 with t5-large and above # see https://github.com/huggingface/transformers/pull/10956 model_class = DialogueS2SGenerationModel elif 'sentence-transformers' in cfg.model.language_model.pretrained_model_name.lower( ): model_class = DialogueNearestNeighbourModel if cfg.pretrained_model or (cfg.model.nemo_path and os.path.exists(cfg.model.nemo_path)): if cfg.pretrained_model: logging.info(f'Loading pretrained model {cfg.pretrained_model}') model = model_class.from_pretrained(cfg.pretrained_model) else: logging.info(f'Restoring model from {cfg.model.nemo_path}') model = model_class.restore_from(cfg.model.nemo_path) if cfg.do_training: model.setup_training_data(train_data_config=cfg.model.train_ds) model.setup_multiple_validation_data( val_data_config=cfg.model.validation_ds) else: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = model_class(cfg.model, trainer=trainer) if cfg.do_training: trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) else: data_dir = cfg.model.dataset.get('data_dir', None) dialogues_example_dir = cfg.model.dataset.get('dialogues_example_dir', None) if data_dir is None or dialogues_example_dir is None: raise ValueError( 'No dataset directory provided. Skipping evaluation. ') elif not os.path.exists(data_dir): raise ValueError( f'{data_dir} is not found, skipping evaluation on the test set.' ) else: if hasattr(model, "update_data_dirs"): model.update_data_dirs( data_dir=data_dir, dialogues_example_dir=dialogues_example_dir) model._cfg.dataset = cfg.model.dataset if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None: eval_device = [cfg.trainer.devices[0]] if isinstance( cfg.trainer.devices, list) else 1 trainer = pl.Trainer(devices=eval_device, accelerator=cfg.trainer.accelerator, precision=16) model.setup_multiple_test_data(test_data_config=cfg.model.test_ds) if model.prepare_test(trainer): trainer.test(model)