def test_s3_no_creds(secrets: Dict[str, str]) -> None: pytest.skip("Temporarily skipping this until we find a more secure way of testing this.") config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml")) config["checkpoint_storage"] = exp.s3_checkpoint_config_no_creds() config.setdefault("environment", {}) config["environment"].setdefault("environment_variables", []) config["environment"]["environment_variables"] += [ f"AWS_ACCESS_KEY_ID={secrets['INTEGRATIONS_S3_ACCESS_KEY']}", f"AWS_SECRET_ACCESS_KEY={secrets['INTEGRATIONS_S3_SECRET_KEY']}", ] exp.run_basic_test_with_temp_config(config, conf.tutorials_path("mnist_pytorch"), 1)
def test_log_null_bytes() -> None: config_obj = conf.load_config(conf.fixtures_path("no_op/single.yaml")) config_obj["hyperparameters"]["write_null"] = True config_obj["max_restarts"] = 0 config_obj["searcher"]["max_length"] = {"batches": 1} experiment_id = exp.run_basic_test_with_temp_config(config_obj, conf.fixtures_path("no_op"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1 logs = exp.trial_logs(trials[0]["id"]) assert len(logs) > 0
def test_tf_keras_mnist_parallel() -> None: config = conf.load_config( conf.tutorials_path("fashion_mnist_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_length(config, {"batches": 200}) experiment_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("fashion_mnist_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_faster_rcnn() -> None: config = conf.load_config( conf.experimental_path("trial/FasterRCNN_tp/16-gpus.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 1) exp.run_basic_test_with_temp_config( config, conf.experimental_path("trial/FasterRCNN_tp"), 1, max_wait_secs=4800)
def test_pytorch_const_with_amp( api_style: str, collect_trial_profiles: Callable[[int], None]) -> None: config = conf.load_config( conf.fixtures_path("pytorch_amp/" + api_style + "_amp.yaml")) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_profiling_enabled(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.fixtures_path("pytorch_amp"), 1) trial_id = exp.experiment_trials(experiment_id)[0].trial.id collect_trial_profiles(trial_id)
def test_on_trial_close_callback() -> None: config = conf.load_config(conf.fixtures_path("estimator_no_op/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 3}) exp_id = exp.run_basic_test_with_temp_config( config, conf.fixtures_path("estimator_no_op"), 1) assert exp.check_if_string_present_in_trial_logs( exp.experiment_trials(exp_id)[0].trial.id, "rank 0 has completed on_trial_close")
def test_mnist_estimator_load() -> None: config = conf.load_config( conf.fixtures_path("mnist_estimator/single.yaml")) config = conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/mnist_estimator"), 1) trials = exp.experiment_trials(experiment_id) model = Determined(conf.make_master_url()).get_trial( trials[0]["id"]).top_checkpoint().load() assert isinstance(model, AutoTrackable)
def test_mnist_estimator_const_parallel(tf2: bool) -> None: config = conf.load_config( conf.fixtures_path("mnist_estimator/single-multi-slot.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) config = conf.set_perform_initial_validation(config, True) exp_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("mnist_estimator"), 1) exp.assert_performed_initial_validation(exp_id)
def test_tf_keras_single_gpu(tf2: bool) -> None: config = conf.load_config(conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 1) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1 ) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tf_keras_mnist_parallel() -> None: config = conf.load_config(conf.official_examples_path("fashion_mnist_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("fashion_mnist_tf_keras"), 1 ) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_pytorch_parallel() -> None: config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_tensor_auto_tuning(config, True) config = conf.set_perform_initial_validation(config, True) exp_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("mnist_pytorch"), 1, has_zeroth_step=True) exp.assert_performed_initial_validation(exp_id)
def test_estimator_when_detecting_gpus() -> None: config = conf.load_config( conf.fixtures_path("estimator_gpu_detection/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) exp.run_basic_test_with_temp_config( config, conf.fixtures_path("estimator_gpu_detection/"), 1, has_zeroth_step=False)
def test_tensorpack_native_parallel() -> None: config = conf.load_config( conf.official_examples_path("mnist_tp/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, True) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_tp"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: if use_amp and aggregation_frequency > 1: pytest.skip("Mixed precision is not support with aggregation frequency > 1.") config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config(config, conf.tutorials_path("mnist_pytorch"), 1)
def test_pytorch_cifar10_parallel() -> None: config = conf.load_config( conf.official_examples_path("trial/cifar10_cnn_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 8) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/cifar10_cnn_pytorch"), 1) trials = exp.experiment_trials(experiment_id) nn = (Determined(conf.make_master_url()).get_trial( trials[0]["id"]).select_checkpoint(latest=True).load()) assert isinstance(nn, torch.nn.Module)
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None: config = conf.load_config( conf.cv_examples_path("cifar10_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("cifar10_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def run_mnist_estimator_data_layer_test(tf2: bool, storage_type: str) -> None: config = conf.load_config( conf.data_layer_examples_path("data_layer_mnist_estimator/const.yaml")) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.data_layer_examples_path("data_layer_mnist_estimator"), 1)
def test_pytorch_cifar10_const() -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_pytorch"), 1) trials = exp.experiment_trials(experiment_id) nn = (Determined(conf.make_master_url()).get_trial( trials[0].id).select_checkpoint(latest=True).load( map_location=torch.device("cpu"))) assert isinstance(nn, torch.nn.Module)
def test_pytorch_load(collect_trial_profiles: Callable[[int], None]) -> None: config = conf.load_config( conf.fixtures_path("mnist_pytorch/const-pytorch11.yaml")) config = conf.set_profiling_enabled(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("mnist_pytorch"), 1) (Determined(conf.make_master_url()).get_experiment( experiment_id).top_checkpoint().load(map_location="cpu")) trial_id = exp.experiment_trials(experiment_id)[0].trial.id collect_trial_profiles(trial_id)
def test_tensorpack_parallel(aggregation_frequency: int) -> None: config = conf.load_config( conf.official_examples_path("trial/mnist_tp/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_length(config, {"batches": 32}) config = conf.set_aggregation_frequency(config, aggregation_frequency) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/mnist_tp"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_distributed_logging() -> None: config = conf.load_config(conf.fixtures_path("pytorch_no_op/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 1}) e_id = exp.run_basic_test_with_temp_config( config, conf.fixtures_path("pytorch_no_op"), 1) t_id = exp.experiment_trials(e_id)[0]["id"] for i in range(config["resources"]["slots_per_trial"]): assert exp.check_if_string_present_in_trial_logs( t_id, "finished train_batch for rank {}".format(i))
def test_tf_keras_native_parallel(tf2: bool) -> None: config = conf.load_config( conf.official_examples_path("trial/cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, True) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/cifar10_cnn_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_pytorch_gan_parallel() -> None: config = conf.load_config( conf.gan_examples_path("gan_mnist_pytorch/const.yaml")) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_slots_per_trial(config, 8) experiment_id = exp.run_basic_test_with_temp_config( config, conf.gan_examples_path("gan_mnist_pytorch"), 1) trials = exp.experiment_trials(experiment_id) (Determined(conf.make_master_url()).get_trial( trials[0]["id"]).select_checkpoint(latest=True).load( map_location="cpu"))
def test_fail_on_preclose_chechpoint_save() -> None: error_log = "failed on checkpoint save" config_obj = conf.load_config(conf.fixtures_path("no_op/single.yaml")) config_obj["hyperparameters"]["fail_on_chechpoint_save"] = error_log config_obj["searcher"]["max_length"] = {"batches": 1} config_obj["min_validation_period"] = {"batches": 1} config_obj["max_restarts"] = 1 exp.run_failure_test_with_temp_config( config_obj, conf.fixtures_path("no_op"), error_log, )
def run_mnist_estimator_data_layer_test(tf2: bool, storage_type: str) -> None: config = conf.load_config( conf.experimental_path("data_layer_mnist_estimator/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.experimental_path("data_layer_mnist_estimator"), 1)
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None: config = conf.load_config(conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1 ) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_mnist_estimator_data_layer_parallel(storage_type: str) -> None: config = conf.load_config( conf.experimental_path("data_layer_mnist_estimator/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 8) config = conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.experimental_path("data_layer_mnist_estimator"), 1)
def test_noop_experiment_config_override() -> None: config_obj = conf.load_config(conf.fixtures_path("no_op/single-one-short-step.yaml")) with tempfile.NamedTemporaryFile() as tf: with open(tf.name, "w") as f: yaml.dump(config_obj, f) experiment_id = exp.create_experiment( tf.name, conf.fixtures_path("no_op"), ["--config", "reproducibility.experiment_seed=8200"], ) exp_config = exp.experiment_config_json(experiment_id) assert exp_config["reproducibility"]["experiment_seed"] == 8200 exp.cancel_single(experiment_id)
def run_tf_keras_mnist_data_layer_test(tf2: bool, storage_type: str) -> None: config = conf.load_config( conf.features_examples_path("data_layer_mnist_tf_keras/const.yaml")) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_min_validation_period(config, {"batches": 1000}) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.features_examples_path("data_layer_mnist_tf_keras"), 1)
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: config = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1)