# make sure can be torch.loaded filepath = str(tmpdir / 'result') torch.save(result, filepath) torch.load(filepath) # assert metric state reset to default values result.reset() assert metric_a.x == metric_a._defaults['x'] assert metric_b.x == metric_b._defaults['x'] assert metric_c.x == metric_c._defaults['x'] batch_idx = None @pytest.mark.parametrize('device', ('cpu', pytest.param('cuda', marks=RunIf(min_gpus=1))) ) def test_lightning_module_logging_result_collection(tmpdir, device): class LoggingModel(BoringModel): def __init__(self): super().__init__() self.metric = DummyMetric() def validation_step(self, batch, batch_idx): v = self.metric(batch_idx) self.log_dict({"v": v, "m": self.metric}) return super().validation_step(batch, batch_idx) def on_save_checkpoint(self, checkpoint) -> None: results = self.trainer._results state_dict = results.state_dict()
on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("bar_3", batch_idx + self.rank, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="max") return super().validation_step(batch, batch_idx) @pytest.mark.parametrize( "devices", [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))]) def test_logging_sync_dist_true(tmpdir, devices): """Tests to ensure that the sync_dist flag works (should just return the original value)""" fake_result = 1 model = LoggingSyncDistModel(fake_result) use_multiple_devices = devices > 1 trainer = Trainer( max_epochs=1, default_root_dir=tmpdir, limit_train_batches=3, limit_val_batches=3, enable_model_summary=False, strategy="ddp_spawn" if use_multiple_devices else None, accelerator="auto", devices=devices,
def test_v_1_8_0_deprecated_device_stats_monitor_prefix_metric_keys(): from pytorch_lightning.callbacks.device_stats_monitor import prefix_metric_keys with pytest.deprecated_call(match="in v1.6 and will be removed in v1.8"): prefix_metric_keys({"foo": 1.0}, "bar") @pytest.mark.parametrize( "cls", [ DDPPlugin, DDP2Plugin, DDPSpawnPlugin, pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)), DataParallelPlugin, DDPFullyShardedPlugin, pytest.param(IPUPlugin, marks=RunIf(ipu=True)), DDPShardedPlugin, DDPSpawnShardedPlugin, TPUSpawnPlugin, ], ) def test_v1_8_0_deprecated_training_type_plugin_classes(cls): old_name = cls.__name__ new_name = old_name.replace("Plugin", "Strategy") with pytest.deprecated_call( match=f"{old_name}` is deprecated in v1.6 and will be removed in v1.8. Use .*{new_name}` instead." ): cls()
"SLURM_PROCID": "0", "SLURM_LOCALID": "0", }, ) @mock.patch("torch.cuda.device_count", return_value=2) @pytest.mark.parametrize("strategy,gpus", [("ddp", 2), ("ddp2", 2), ("ddp_spawn", 2)]) @pytest.mark.parametrize( "amp,custom_plugin,plugin_cls", [ ("native", False, NativeMixedPrecisionPlugin), ("native", True, MyNativeAMP), pytest.param("apex", False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)), pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), ], ) def test_amp_apex_ddp(mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() trainer = Trainer( fast_dev_run=True, precision=16, amp_backend=amp, gpus=gpus, strategy=strategy, plugins=plugin,
"GROUP_RANK": "1", "RANK": "3", "WORLD_SIZE": "4", "LOCAL_WORLD_SIZE": "2", } environment = TorchElasticEnvironment() yield environment, variables, expected @pytest.mark.parametrize( "plugin_cls", [ DDPPlugin, DDPShardedPlugin, DDP2Plugin, pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)), pytest.param(RPCSequentialPlugin, marks=RunIf(fairscale_pipe=True)), ], ) def test_ranks_available_manual_plugin_selection(plugin_cls): """ Test that the rank information is readily available after Trainer initialization. """ num_nodes = 2 for cluster, variables, expected in environment_combinations(): if plugin_cls == DDP2Plugin: expected.update(global_rank=expected["node_rank"], world_size=num_nodes) with mock.patch.dict(os.environ, variables): plugin = plugin_cls( parallel_devices=[torch.device("cuda", 1), torch.device("cuda", 2)], cluster_environment=cluster,
DDPShardedPlugin) trainer = Trainer(plugins=plugin, num_processes=2) assert isinstance(trainer.accelerator.training_type_plugin, DDPShardedPlugin) @pytest.mark.parametrize( ["accelerator", "plugin"], [ ("ddp", DDPPlugin), ("ddp_spawn", DDPSpawnPlugin), ("ddp_sharded", DDPShardedPlugin), ("ddp_sharded_spawn", DDPSpawnShardedPlugin), pytest.param("deepspeed", DeepSpeedPlugin, marks=RunIf(deepspeed=True)), ], ) @mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) @pytest.mark.parametrize("gpus", [1, 2]) def test_accelerator_choice_multi_node_gpu(mock_is_available, mock_device_count, tmpdir, accelerator: str, plugin: ParallelPlugin, gpus: int): trainer = Trainer(accelerator=accelerator, default_root_dir=tmpdir, num_nodes=2, gpus=gpus) assert isinstance(trainer.training_type_plugin, plugin)
if not self.early_stop_on_train: return self._epoch_end() def validation_epoch_end(self, outputs): if self.early_stop_on_train: return self._epoch_end() def on_train_end(self) -> None: assert self.trainer.current_epoch - 1 == self.expected_end_epoch, "Early Stopping Failed" _ES_CHECK = dict(check_on_train_epoch_end=True) _ES_CHECK_P3 = dict(patience=3, check_on_train_epoch_end=True) _SPAWN_MARK = dict(marks=RunIf(skip_windows=True, skip_49370=True)) @pytest.mark.parametrize( "callbacks, expected_stop_epoch, check_on_train_epoch_end, strategy, devices", [ ([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, None, 1), ([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, None, 1), pytest.param([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, "ddp_spawn", 2, **_SPAWN_MARK), pytest.param([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, "ddp_spawn", 2, **_SPAWN_MARK), ([EarlyStopping("abc", **_ES_CHECK), EarlyStopping("cba", **_ES_CHECK_P3)], 3, True, None, 1), ([EarlyStopping("cba", **_ES_CHECK_P3), EarlyStopping("abc", **_ES_CHECK)], 3, True, None, 1), pytest.param( [EarlyStopping("abc", **_ES_CHECK), EarlyStopping("cba", **_ES_CHECK_P3)], 3, True,
def on_train_end(self) -> None: assert self.trainer.current_epoch == self.expected_end_epoch, 'Early Stopping Failed' @pytest.mark.parametrize( "callbacks, expected_stop_epoch, accelerator, num_processes", [ ([EarlyStopping(monitor='abc'), EarlyStopping(monitor='cba', patience=3)], 3, None, 1), ([EarlyStopping(monitor='cba', patience=3), EarlyStopping(monitor='abc')], 3, None, 1), pytest.param([EarlyStopping(monitor='abc'), EarlyStopping(monitor='cba', patience=3)], 3, 'ddp_cpu', 2, marks=RunIf(skip_windows=True)), pytest.param([EarlyStopping(monitor='cba', patience=3), EarlyStopping(monitor='abc')], 3, 'ddp_cpu', 2, marks=RunIf(skip_windows=True)), ], ) def test_multiple_early_stopping_callbacks( tmpdir, callbacks: List[EarlyStopping], expected_stop_epoch: int, accelerator: Optional[str], num_processes: int ): """Ensure when using multiple early stopping callbacks we stop if any signals we should stop.""" model = EarlyStoppingModel(expected_stop_epoch)
os.environ, { "CUDA_VISIBLE_DEVICES": "0,1", "SLURM_NTASKS": "2", "SLURM_JOB_NAME": "SOME_NAME", "SLURM_NODEID": "0", "LOCAL_RANK": "0", "SLURM_LOCALID": "0", }) @mock.patch('torch.cuda.device_count', return_value=2) @pytest.mark.parametrize('ddp_backend,gpus', [('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)]) @pytest.mark.parametrize('amp,custom_plugin,plugin_cls', [ pytest.param('native', False, NativeMixedPrecisionPlugin, marks=RunIf(amp_native=True)), pytest.param('native', True, MyNativeAMP, marks=RunIf(amp_native=True)), pytest.param( 'apex', False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)), pytest.param('apex', True, MyApexPlugin, marks=RunIf(amp_apex=True)) ]) def test_amp_apex_ddp(mocked_device_count, ddp_backend: str, gpus: int, amp: str, custom_plugin: bool, plugin_cls: MixedPrecisionPlugin): trainer = Trainer( fast_dev_run=True, precision=16, amp_backend=amp, gpus=gpus, accelerator=ddp_backend,
model = BoringModel() trainer = Trainer(strategy="ddp_sharded_spawn", accelerator="cpu", devices=2, fast_dev_run=True) trainer.fit(model, ckpt_path=checkpoint_path) @RunIf(skip_windows=True, standalone=True, fairscale=True) @pytest.mark.parametrize( "trainer_kwargs", (dict(accelerator="cpu", devices=2), pytest.param(dict(accelerator="gpu", devices=2), marks=RunIf(min_gpus=2))), ) def test_ddp_sharded_strategy_test_multigpu(tmpdir, trainer_kwargs): """Test to ensure we can use validate and test without fit.""" model = BoringModel() trainer = Trainer( strategy="ddp_sharded_spawn", fast_dev_run=True, enable_progress_bar=False, enable_model_summary=False, **trainer_kwargs, ) trainer.validate(model) trainer.test(model)
# make sure can be torch.loaded filepath = str(tmpdir / "result") torch.save(result, filepath) torch.load(filepath) # assert metric state reset to default values result.reset() assert metric_a.x == metric_a._defaults["x"] assert metric_b.x == metric_b._defaults["x"] assert metric_c.x == metric_c._defaults["x"] batch_idx = None @pytest.mark.parametrize("device", ("cpu", pytest.param("cuda", marks=RunIf(min_gpus=1))) ) def test_lightning_module_logging_result_collection(tmpdir, device): class LoggingModel(BoringModel): def __init__(self): super().__init__() self.metric = DummyMetric() def validation_step(self, batch, batch_idx): v = self.metric(batch_idx) self.log_dict({"v": v, "m": self.metric}) return super().validation_step(batch, batch_idx) def on_save_checkpoint(self, checkpoint) -> None: results = self.trainer._results # simplify logic
on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("bar_3", batch_idx + self.rank, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="max") return super().validation_step(batch, batch_idx) @pytest.mark.parametrize("gpus", [ None, pytest.param(1, marks=RunIf(min_gpus=1)), pytest.param(2, marks=RunIf(min_gpus=2)) ]) def test_logging_sync_dist_true(tmpdir, gpus): """Tests to ensure that the sync_dist flag works (should just return the original value)""" fake_result = 1 model = LoggingSyncDistModel(fake_result) trainer = Trainer( max_epochs=1, default_root_dir=tmpdir, limit_train_batches=3, limit_val_batches=3, weights_summary=None, gpus=gpus, ) trainer.fit(model)
EmptyLite.seed_everything(3) lite = EmptyLite() lite_dataloader = lite.setup_dataloaders(DataLoader(Mock())) assert lite_dataloader.worker_init_fn.func is pl_worker_init_function assert os.environ == {"PL_GLOBAL_SEED": "3", "PL_SEED_WORKERS": "1"} @pytest.mark.parametrize( "strategy", [ _StrategyType.DP, _StrategyType.DDP, _StrategyType.DDP_SPAWN, pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)), pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)), pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)), ], ) def test_setup_dataloaders_replace_custom_sampler(strategy): """Test that asking to replace a custom sampler results in an error when a distributed sampler would be needed.""" custom_sampler = Mock(spec=Sampler) dataloader = DataLoader(Mock(), sampler=custom_sampler) # explicitly asking to replace when a custom sampler is already configured raises an exception lite = EmptyLite(accelerator="cpu", strategy=strategy, devices=2) if lite._accelerator_connector.is_distributed: with pytest.raises(MisconfigurationException, match="You seem to have configured a sampler in your DataLoader"): lite.setup_dataloaders(dataloader, replace_sampler=True)
optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1) return optimizer, optimizer_2 @pytest.mark.parametrize( "kwargs", [ {}, pytest.param( { "accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native" }, marks=RunIf(min_gpus=1)), pytest.param( { "accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "apex", "amp_level": "O2" }, marks=RunIf(min_gpus=1, amp_apex=True), ), ], ) def test_multiple_optimizers_manual_no_return(tmpdir, kwargs): apex_optimizer_patches = [] apex_optimizer_steps = []
@pytest.mark.parametrize("test_option,do_train,gpus", [ pytest.param(0, True, 0, id='full_loop'), pytest.param(0, False, 0, id='test_only'), pytest.param(1, False, 0, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")), pytest.param(2, False, 0, id='mix_of_tensor_dims'), pytest.param(3, False, 0, id='string_list_predictions'), pytest.param(4, False, 0, id='int_list_predictions'), pytest.param(5, False, 0, id='nested_list_predictions'), pytest.param(6, False, 0, id='dict_list_predictions'), pytest.param(7, True, 0, id='write_dict_predictions'), pytest.param( 0, True, 1, id='full_loop_single_gpu', marks=RunIf(min_gpus=1)) ]) def test_result_obj_predictions(tmpdir, test_option: int, do_train: bool, gpus: int): class CustomBoringModel(BoringModel): def test_step(self, batch, batch_idx, optimizer_idx=None): output = self(batch) test_loss = self.loss(batch, output) self.log('test_loss', test_loss) batch_size = batch.size(0) lst_of_str = [ random.choice(['dog', 'cat']) for i in range(batch_size) ] lst_of_int = [random.randint(500, 1000) for i in range(batch_size)] lst_of_lst = [[x] for x in lst_of_int]
"SLURM_NTASKS": "2", "SLURM_JOB_NAME": "SOME_NAME", "SLURM_NODEID": "0", "LOCAL_RANK": "0", "SLURM_PROCID": "0", "SLURM_LOCALID": "0", }, ) @mock.patch("torch.cuda.device_count", return_value=2) @pytest.mark.parametrize("strategy,gpus", [("ddp", 2), ("ddp2", 2), ("ddp_spawn", 2)]) @pytest.mark.parametrize( "amp,custom_plugin,plugin_cls", [ ("native", False, NativeMixedPrecisionPlugin), ("native", True, MyNativeAMP), pytest.param("apex", False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)), pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), ], ) def test_amp_apex_ddp(mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() trainer = Trainer( fast_dev_run=True, precision=16, amp_backend=amp, gpus=gpus, strategy=strategy, plugins=plugin, )
checkpoint_plugin.reset_mock() ck = ModelCheckpoint(dirpath=tmpdir, save_last=True) model = BoringModel() device = torch.device("cpu") trainer = Trainer( default_root_dir=tmpdir, strategy=SingleDevicePlugin(device), plugins=[checkpoint_plugin], callbacks=ck, max_epochs=2, ) trainer.fit(model) assert checkpoint_plugin.save_checkpoint.call_count == 5 assert checkpoint_plugin.remove_checkpoint.call_count == 1 trainer.test(model, ckpt_path=ck.last_model_path) checkpoint_plugin.load_checkpoint.assert_called_once() checkpoint_plugin.load_checkpoint.assert_called_with(tmpdir / "last.ckpt") @pytest.mark.parametrize("plugin_cls", [ pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)), TPUSpawnPlugin ]) def test_no_checkpoint_io_plugin_support(plugin_cls): with pytest.raises( MisconfigurationException, match="currently does not support custom checkpoint plugins"): plugin_cls().checkpoint_io = CustomCheckpointIO()
on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("bar_3", batch_idx + self.rank, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="max") return super().validation_step(batch, batch_idx) @pytest.mark.parametrize( "devices", [1, pytest.param(2, marks=RunIf(skip_windows=True, skip_49370=True))]) def test_logging_sync_dist_true(tmpdir, devices): """Tests to ensure that the sync_dist flag works (should just return the original value)""" fake_result = 1 model = LoggingSyncDistModel(fake_result) use_multiple_devices = devices > 1 trainer = Trainer( max_epochs=1, default_root_dir=tmpdir, limit_train_batches=3, limit_val_batches=3, enable_model_summary=False, strategy="ddp_spawn" if use_multiple_devices else None, accelerator="auto", devices=devices,
trainer = Trainer(accelerator=accelerator, plugins=plugin, num_processes=2) assert isinstance(trainer.strategy, DDPShardedStrategy) with pytest.deprecated_call(match="Passing .* `strategy` to the `plugins`"): trainer = Trainer(plugins=plugin, num_processes=2) assert isinstance(trainer.strategy, DDPShardedStrategy) @pytest.mark.parametrize( ["accelerator", "plugin"], [ ("ddp", DDPStrategy), ("ddp_spawn", DDPSpawnStrategy), ("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPSpawnShardedStrategy), pytest.param("deepspeed", DeepSpeedStrategy, marks=RunIf(deepspeed=True)), ], ) @mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) @pytest.mark.parametrize("gpus", [1, 2]) def test_accelerator_choice_multi_node_gpu( mock_is_available, mock_device_count, tmpdir, accelerator: str, plugin: ParallelStrategy, gpus: int ): with pytest.deprecated_call(match=r"accelerator=.*\)` has been deprecated"): trainer = Trainer(accelerator=accelerator, default_root_dir=tmpdir, num_nodes=2, gpus=gpus) assert isinstance(trainer.strategy, plugin) @pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't require GPU") def test_accelerator_cpu():
if not self.early_stop_on_train: return self._epoch_end() def validation_epoch_end(self, outputs): if self.early_stop_on_train: return self._epoch_end() def on_train_end(self) -> None: assert self.trainer.current_epoch == self.expected_end_epoch, "Early Stopping Failed" _ES_CHECK = dict(check_on_train_epoch_end=True) _ES_CHECK_P3 = dict(patience=3, check_on_train_epoch_end=True) _NO_WIN = dict(marks=RunIf(skip_windows=True)) @pytest.mark.parametrize( "callbacks, expected_stop_epoch, check_on_train_epoch_end, accelerator, num_processes", [ ([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, None, 1), ([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, None, 1), pytest.param([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, "ddp_cpu", 2, **_NO_WIN), pytest.param([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, "ddp_cpu", 2, ** _NO_WIN),
import pytest import torch import tests.helpers.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.plugins import SingleDevicePlugin from tests.accelerators.test_dp import CustomClassificationModelDP from tests.helpers.boring_model import BoringModel from tests.helpers.datamodules import ClassifDataModule from tests.helpers.runif import RunIf @pytest.mark.parametrize( "trainer_kwargs", ( pytest.param(dict(gpus=1), marks=RunIf(min_gpus=1)), pytest.param(dict(accelerator="dp", gpus=2), marks=RunIf(min_gpus=2)), pytest.param(dict(accelerator="ddp_spawn", gpus=2), marks=RunIf(min_gpus=2)), ), ) def test_evaluate(tmpdir, trainer_kwargs): tutils.set_random_master_port() dm = ClassifDataModule() model = CustomClassificationModelDP() trainer = Trainer(default_root_dir=tmpdir, max_epochs=2, limit_train_batches=10, limit_val_batches=10, deterministic=True,
Ensure that when a plugin and accelerator is passed in, that the plugin takes precedent. """ trainer = Trainer(accelerator=accelerator, plugins=plugin, num_processes=2) assert isinstance(trainer.accelerator.training_type_plugin, DDPShardedPlugin) trainer = Trainer(plugins=plugin, num_processes=2) assert isinstance(trainer.accelerator.training_type_plugin, DDPShardedPlugin) @pytest.mark.parametrize(["accelerator", "plugin"], [ ('ddp', DDPPlugin), ('ddp_spawn', DDPSpawnPlugin), ('ddp_sharded', DDPShardedPlugin), ('ddp_sharded_spawn', DDPSpawnShardedPlugin), pytest.param('deepspeed', DeepSpeedPlugin, marks=RunIf(deepspeed=True)), ]) @mock.patch('torch.cuda.is_available', return_value=True) @mock.patch('torch.cuda.device_count', return_value=2) def test_accelerator_choice_multi_node_gpu(mock_is_available, mock_device_count, accelerator, plugin, tmpdir): trainer = Trainer( accelerator=accelerator, default_root_dir=tmpdir, num_nodes=2, gpus=2, ) assert isinstance(trainer.training_type_plugin, plugin)
if precision == 32: yield return if accelerator == "gpu": with torch.cuda.amp.autocast(): yield elif accelerator == "cpu": with torch.cpu.amp.autocast(): yield @pytest.mark.parametrize( "precision, strategy, devices, accelerator", [ pytest.param(32, None, 1, "cpu"), pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)), pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)), pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_torch="1.10", min_gpus=1)), ], ) def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir): LightningLite.seed_everything(42) train_dataloader = DataLoader(RandomDataset(32, 8)) model = BoringModel() num_epochs = 1 state_dict = deepcopy(model.state_dict()) lite = LiteRunner(precision=precision, strategy=strategy, devices=devices, accelerator=accelerator) lite.run(model, train_dataloader, num_epochs=num_epochs) lite_state_dict = model.state_dict()
trainer = Trainer( fast_dev_run=True, default_root_dir=tmpdir, plugins='deepspeed', ) plugin = trainer.accelerator.training_type_plugin assert isinstance(plugin, DeepSpeedPlugin) assert plugin.parallel_devices == [torch.device('cpu')] assert plugin.config == deepspeed_config @RunIf(amp_native=True, deepspeed=True) @pytest.mark.parametrize("amp_backend", [ pytest.param("native", marks=RunIf(amp_native=True)), pytest.param("apex", marks=RunIf(amp_apex=True)), ]) def test_deepspeed_precision_choice(amp_backend, tmpdir): """ Test to ensure precision plugin is also correctly chosen. DeepSpeed handles precision via Custom DeepSpeedPrecisionPlugin """ trainer = Trainer( fast_dev_run=True, default_root_dir=tmpdir, plugins='deepspeed', amp_backend=amp_backend, precision=16, )
assert cli.model.num_classes == 5 class EarlyExitTestModel(BoringModel): def on_fit_start(self): raise Exception("Error on fit start") @pytest.mark.parametrize("logger", (False, True)) @pytest.mark.parametrize( "trainer_kwargs", ( dict(accelerator="ddp_cpu"), dict(accelerator="ddp_cpu", plugins="ddp_find_unused_parameters_false"), pytest.param({"tpu_cores": 1}, marks=RunIf(tpu=True)), ), ) def test_cli_ddp_spawn_save_config_callback(tmpdir, logger, trainer_kwargs): with mock.patch("sys.argv", ["any.py", "fit"]), pytest.raises( Exception, match=r"Error on fit start"): LightningCLI( EarlyExitTestModel, trainer_defaults={ "default_root_dir": str(tmpdir), "logger": logger, "max_steps": 1, "max_epochs": 1, **trainer_kwargs, }, )
strategy=strategy, precision=precision, ) model = AMPTestModel() trainer.fit(model) trainer.test(model) trainer.predict(model, DataLoader(RandomDataset(32, 64))) assert trainer.state.finished, f"Training failed with {trainer.state}" @RunIf(min_gpus=2, min_torch="1.10") @pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"]) @pytest.mark.parametrize( "precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))]) @pytest.mark.parametrize("devices", [1, 2]) def test_amp_gpus(tmpdir, strategy, precision, devices): """Make sure combinations of AMP and strategies work if supported.""" tutils.reset_seed() trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, accelerator="gpu", devices=devices, strategy=strategy, precision=precision, ) model = AMPTestModel()
else: class DictConfSubClassBoringModel: ... @pytest.mark.parametrize( "cls", [ CustomBoringModel, SubClassBoringModel, NonSavingSubClassBoringModel, SubSubClassBoringModel, AggSubClassBoringModel, UnconventionalArgsBoringModel, pytest.param(DictConfSubClassBoringModel, marks=RunIf(omegaconf=True)), ], ) def test_collect_init_arguments(tmpdir, cls): """Test that the model automatically saves the arguments passed into the constructor.""" extra_args = {} if cls is AggSubClassBoringModel: extra_args.update(my_loss=torch.nn.CosineEmbeddingLoss()) elif cls is DictConfSubClassBoringModel: extra_args.update(dict_conf=OmegaConf.create(dict( my_param="anything"))) model = cls(**extra_args) assert model.hparams.batch_size == 64 model = cls(batch_size=179, **extra_args) assert model.hparams.batch_size == 179
trainer.predict(model, dataloaders=dataloader, return_predictions=False) assert cb.write_on_batch_end.call_count == 4 assert cb.write_on_epoch_end.call_count == 0 DummyPredictionWriter.write_on_batch_end.reset_mock() DummyPredictionWriter.write_on_epoch_end.reset_mock() cb = DummyPredictionWriter("epoch") trainer = Trainer(limit_predict_batches=4, callbacks=cb) trainer.predict(model, dataloaders=dataloader, return_predictions=False) assert cb.write_on_batch_end.call_count == 0 assert cb.write_on_epoch_end.call_count == 1 @pytest.mark.parametrize("num_workers", [0, pytest.param(2, marks=RunIf(slow=True))]) def test_prediction_writer_batch_indices(tmpdir, num_workers): DummyPredictionWriter.write_on_batch_end = Mock() DummyPredictionWriter.write_on_epoch_end = Mock() dataloader = DataLoader(RandomDataset(32, 64), batch_size=4, num_workers=num_workers) model = BoringModel() writer = DummyPredictionWriter("batch_and_epoch") trainer = Trainer(limit_predict_batches=4, callbacks=writer) trainer.predict(model, dataloaders=dataloader) writer.write_on_batch_end.assert_has_calls([ call(trainer, model, ANY, [0, 1, 2, 3], ANY, 0, 0), call(trainer, model, ANY, [4, 5, 6, 7], ANY, 1, 0),
model = BoringModel() trainer = Trainer( accelerator='ddp_sharded_spawn', num_processes=2, fast_dev_run=True, resume_from_checkpoint=checkpoint_path, ) trainer.fit(model) @RunIf(skip_windows=True, special=True, fairscale=True) @pytest.mark.parametrize("trainer_kwargs", ( dict(num_processes=2), pytest.param(dict(gpus=2), marks=RunIf(min_gpus=2)), )) def test_ddp_sharded_plugin_test_multigpu(tmpdir, trainer_kwargs): """ Test to ensure we can use validate and test without fit """ model = BoringModel() trainer = Trainer( accelerator='ddp_sharded_spawn', fast_dev_run=True, **trainer_kwargs, ) trainer.validate(model) trainer.test(model)
dict(name="predict_step", args=(ANY, i)), # TODO: `predict_step_end` dict(name="Callback.on_predict_batch_end", args=(trainer, model, ANY, ANY, i, 0)), dict(name="on_predict_batch_end", args=(ANY, ANY, i, 0)), ]) return out @pytest.mark.parametrize( "kwargs", [ {}, # these precision plugins modify the optimization flow, so testing them explicitly pytest.param(dict(gpus=1, precision=16, plugins="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1)), pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(amp_native=True, min_gpus=1)), pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)), ], ) @pytest.mark.parametrize("automatic_optimization", (True, False)) def test_trainer_model_hook_system_fit(tmpdir, kwargs, automatic_optimization): called = [] class TestModel(HookedModel): def __init__(self, *args): super().__init__(*args) self.automatic_optimization = automatic_optimization