def test_get_experiment_id_from_env(): # When no env variables are set HelperEnv.assert_values(None, None) assert _get_experiment_id_from_env() is None # set only ID random_id = random.randint(1, 1e6) HelperEnv.set_values(experiment_id=random_id) HelperEnv.assert_values(str(random_id), None) assert _get_experiment_id_from_env() == str(random_id) # set only name with TempDir(chdr=True): name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None HelperEnv.set_values(name=name) HelperEnv.assert_values(None, name) assert _get_experiment_id_from_env() == exp_id # set both: assert that name variable takes precedence with TempDir(chdr=True): name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None random_id = random.randint(1, 1e6) HelperEnv.set_values(name=name, experiment_id=random_id) HelperEnv.assert_values(str(random_id), name) assert _get_experiment_id_from_env() == exp_id
def test_create_experiments_with_bad_names(): # None for name with pytest.raises(MlflowException) as e: kiwi.create_experiment(None) assert e.message.contains("Invalid experiment name: 'None'") # empty string name with pytest.raises(MlflowException) as e: kiwi.create_experiment("") assert e.message.contains("Invalid experiment name: ''")
def test_create_experiment_with_duplicate_name(): name = "popular_name" exp_id = kiwi.create_experiment(name) with pytest.raises(MlflowException): kiwi.create_experiment(name) tracking.MlflowClient().delete_experiment(exp_id) with pytest.raises(MlflowException): kiwi.create_experiment(name)
def test_get_experiment_by_name(): with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) experiment = kiwi.get_experiment_by_name(name) assert experiment.experiment_id == exp_id
def test_list_experiments(): def _assert_exps(ids_to_lifecycle_stage, view_type_arg): result = set([ (exp.experiment_id, exp.lifecycle_stage) for exp in client.list_experiments(view_type=view_type_arg) ]) assert result == set([ (exp_id, stage) for exp_id, stage in ids_to_lifecycle_stage.items() ]) experiment_id = kiwi.create_experiment("exp_1") assert experiment_id == '1' client = tracking.MlflowClient() _assert_exps({ '0': LifecycleStage.ACTIVE, '1': LifecycleStage.ACTIVE }, ViewType.ACTIVE_ONLY) _assert_exps({ '0': LifecycleStage.ACTIVE, '1': LifecycleStage.ACTIVE }, ViewType.ALL) _assert_exps({}, ViewType.DELETED_ONLY) client.delete_experiment(experiment_id) _assert_exps({'0': LifecycleStage.ACTIVE}, ViewType.ACTIVE_ONLY) _assert_exps({ '0': LifecycleStage.ACTIVE, '1': LifecycleStage.DELETED }, ViewType.ALL) _assert_exps({'1': LifecycleStage.DELETED}, ViewType.DELETED_ONLY)
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id( ): # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None kiwi.set_experiment(name) assert _get_experiment_id() == exp_id
def test_runs_artifact_repo_init(): artifact_location = "s3://blah_bucket/" experiment_id = kiwi.create_experiment("expr_abc", artifact_location) with kiwi.start_run(experiment_id=experiment_id): run_id = kiwi.active_run().info.run_id runs_uri = "runs:/%s/path/to/model" % run_id runs_repo = RunsArtifactRepository(runs_uri) assert runs_repo.artifact_uri == runs_uri assert isinstance(runs_repo.repo, S3ArtifactRepository) expected_absolute_uri = "%s%s/artifacts/path/to/model" % ( artifact_location, run_id) assert runs_repo.repo.artifact_uri == expected_absolute_uri
def test_runs_artifact_repo_uses_repo_download_artifacts(): """ The RunsArtifactRepo should delegate `download_artifacts` to it's self.repo.download_artifacts function """ artifact_location = "s3://blah_bucket/" experiment_id = kiwi.create_experiment("expr_abcd", artifact_location) with kiwi.start_run(experiment_id=experiment_id): run_id = kiwi.active_run().info.run_id runs_repo = RunsArtifactRepository('runs:/{}'.format(run_id)) runs_repo.repo = Mock() runs_repo.download_artifacts('artifact_path', 'dst_path') runs_repo.repo.download_artifacts.assert_called_once()
def test_create_experiment(): with pytest.raises(TypeError): kiwi.create_experiment() # pylint: disable=no-value-for-parameter with pytest.raises(Exception): kiwi.create_experiment(None) with pytest.raises(Exception): kiwi.create_experiment("") exp_id = kiwi.create_experiment("Some random experiment name %d" % random.randint(1, 1e6)) assert exp_id is not None
def test_get_experiment_id_in_databricks_with_experiment_defined_in_env_returns_env_experiment_id( ): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(exp_name) notebook_id = str(int(exp_id) + 73) HelperEnv.set_values(experiment_id=exp_id) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.side_effect = lambda *args, **kwargs: True notebook_id_mock.side_effect = lambda *args, **kwargs: notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_with_active_experiment_returns_active_experiment_id( ): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(exp_name) kiwi.set_experiment(exp_name) notebook_id = str(int(exp_id) + 73) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.return_value = True notebook_id_mock.return_value = notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_search_runs_multiple_experiments(): experiment_ids = [ kiwi.create_experiment("exp__{}".format(exp_id)) for exp_id in range(1, 4) ] for eid in experiment_ids: with kiwi.start_run(experiment_id=eid): kiwi.log_metric("m0", 1) kiwi.log_metric("m_{}".format(eid), 2) assert len(MlflowClient().search_runs(experiment_ids, "metrics.m0 > 0", ViewType.ALL)) == 3 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_1 > 0", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_2 = 2", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_3 < 4", ViewType.ALL)) == 1
def test_set_experiment(): with pytest.raises(TypeError): kiwi.set_experiment() # pylint: disable=no-value-for-parameter with pytest.raises(Exception): kiwi.set_experiment(None) with pytest.raises(Exception): kiwi.set_experiment("") name = "random_exp" exp_id = kiwi.create_experiment(name) kiwi.set_experiment(name) with start_run() as run: assert run.info.experiment_id == exp_id another_name = "another_experiment" kiwi.set_experiment(another_name) exp_id2 = kiwi.tracking.MlflowClient().get_experiment_by_name(another_name) with start_run() as another_run: assert another_run.info.experiment_id == exp_id2.experiment_id
def test_fetch_create_and_log(tmpdir): entry_point_name = "entry_point" parameters = { "method_name": "string", } entry_point = _project_spec.EntryPoint(entry_point_name, parameters, "run_model.sh") mock_fetched_project = _project_spec.Project( None, {entry_point_name: entry_point}, None, "my_project") experiment_id = kiwi.create_experiment("test_fetch_project") expected_dir = tmpdir project_uri = "http://someuri/myproject.git" user_param = {"method_name": "newton"} with mock.patch("mlflow.projects.utils._fetch_project", return_value=expected_dir): with mock.patch("mlflow.projects._project_spec.load_project", return_value=mock_fetched_project): work_dir = fetch_and_validate_project("", "", entry_point_name, user_param) project = load_project(work_dir) assert mock_fetched_project == project assert expected_dir == work_dir # Create a run active_run = get_or_create_run(run_id=None, uri=project_uri, experiment_id=experiment_id, work_dir=work_dir, version=None, entry_point=entry_point_name, parameters=user_param) # check tags run = kiwi.get_run(active_run.info.run_id) assert MLFLOW_PROJECT_ENTRY_POINT in run.data.tags assert MLFLOW_SOURCE_NAME in run.data.tags assert entry_point_name == run.data.tags[ MLFLOW_PROJECT_ENTRY_POINT] assert project_uri == run.data.tags[MLFLOW_SOURCE_NAME] assert user_param == run.data.params
def test_model_log(): with TempDir(chdr=True) as tmp: experiment_id = kiwi.create_experiment("test") sig = ModelSignature(inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")])) input_example = {"x": 1, "y": 2} with kiwi.start_run(experiment_id=experiment_id) as r: Model.log("some/path", TestFlavor, signature=sig, input_example=input_example) local_path = _download_artifact_from_uri("runs:/{}/some/path".format(r.info.run_id), output_path=tmp.path("")) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert loaded_model.run_id == r.info.run_id assert loaded_model.artifact_path == "some/path" assert loaded_model.flavors == { "flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}, } assert loaded_model.signature == sig path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example
def run(original="data/processed/jokes.json", replaced="data/processed/dynamic_template_jokes.json"): dataset = RankSequenceDataset( original=original, replaced=replaced, embeddings_path="./data/raw/roularta-160.txt") # Create splits train_size = int(0.7 * len(dataset)) val_size = int((len(dataset) - train_size) / 2) test_size = len(dataset) - train_size - val_size train_dataset, val_dataset, test_dataset = torch.utils.data.random_split( dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(1)) # Create pytoch dataloaders train_loader = DataLoader(train_dataset, batch_size=64, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=64, num_workers=8) test_loader = DataLoader(test_dataset, batch_size=64, num_workers=8) # Register dataset with kiwi current_experiment_id: int = kiwi.create_experiment( datetime.datetime.now().__str__()) kiwi.register_training_dataset(dataloader=train_dataset, dataset_location=original, experiment_id=current_experiment_id) kiwi.register_dev_dataset(dataloader=val_dataset, dataset_location=original, experiment_id=current_experiment_id) kiwi.register_test_dataset(dataloader=test_dataset, dataset_location=original, experiment_id=current_experiment_id) def objective(args): # start a run with kiwi.start_run(experiment_id=current_experiment_id): # register hyperparams for key, value in args.items(): kiwi.log_param(key, value) # Define model model = RankedNetworkCNNModule(args['learning_rate'], dataset.get_embeddings(), hidden_dim=args['hidden'], output_labels=2) # Train (obviously) trainer = pl.Trainer(max_epochs=15, logger=KiwiLogger()) trainer.fit(model, train_loader, val_loader) # Evaluation on held-out test-set with torch.no_grad(): model.eval() results = pd.DataFrame(columns=['labels', 'predictions']) for batch_idx, batch in enumerate(test_loader): y_hat = model(batch['a'], batch['b']) results: pd.DataFrame = results.append(pd.DataFrame({ 'labels': batch['label'].flatten(), 'predictions': y_hat.detach().argmax(axis=1) }), ignore_index=True) results.to_csv() # With a nice confusion matrix confusion_matrix(y_pred=results['predictions'].values, y_true=results['labels'].values, classes=[0, 1]) cm = ConfusionMatrix( actual_vector=results['labels'].values, predict_vector=results['predictions'].values) output_test_results = "cm.txt" cm.save_stat(output_test_results) output_test_predictions_file = "test_predictions.txt" np.savetxt(output_test_predictions_file, results['predictions'].values, delimiter=",") kiwi.log_metric(key="test_acc", value=cm.Overall_ACC) kiwi.log_metric(key="test_f1_micro", value=cm.F1_Micro) kiwi.log_metric(key="test_f1_macro", value=cm.F1_Macro) kiwi.log_metric(key="test_ci_pm", value=cm.CI95[1] - cm.Overall_ACC) kiwi.log_metric(key="test_ci_pm", value=cm.CI95[1] - cm.Overall_ACC) kiwi.log_artifact(output_test_predictions_file) kiwi.log_artifact(output_test_results + ".pycm") return cm.Overall_ACC space = { 'learning_rate': ("range", [1e-3, 1e-1]), # 'batch_size': ("choice", [4, 8, 16, 32, 64, 128]), 'hidden': ("choice", [16]) } kiwi.start_experiment(current_experiment_id, hp_space=space, objective=objective, max_evals=10, mode="random")
def test_create_experiments_with_bad_name_types(name): with pytest.raises(MlflowException) as e: kiwi.create_experiment(name) assert e.message.contains( "Invalid experiment name: %s. Expects a string." % name)