def test_repository(self, client, client_2, organization, created_entities): """ The above, but for repository. Because there is no client.create_repository() or client.get_repository(). """ organization.add_member(client_2._conn.email) client.set_workspace(organization.name) client_2.set_workspace(organization.name) # private private_repo = client.set_repository(_utils.generate_default_name(), visibility=Private()) created_entities.append(private_repo) with pytest.raises(Exception, match="unable to get Repository"): client_2.set_repository(private_repo.name) # read-only read_repo = client.set_repository(_utils.generate_default_name(), visibility=OrgCustom(write=False)) created_entities.append(read_repo) retrieved_repo = client_2.set_repository(read_repo.name) assert retrieved_repo.id == read_repo.id with pytest.raises(requests.HTTPError, match="Access Denied|Forbidden"): retrieved_repo.delete() # read-write write_repo = client.set_repository(_utils.generate_default_name(), visibility=OrgCustom(write=True)) try: retrieved_repo = client_2.set_repository(write_repo.name) retrieved_repo.delete() except: created_entities.append(write_repo)
def test_add_notification_channels( self, client, summary, created_entities, ): name = _utils.generate_default_name() alerter = FixedAlerter(comparison.GreaterThan(0.7)) channel1 = client.monitoring.notification_channels.create( _utils.generate_default_name(), SlackNotificationChannel(_utils.generate_default_name()), ) created_entities.append(channel1) channel2 = client.monitoring.notification_channels.create( _utils.generate_default_name(), SlackNotificationChannel(_utils.generate_default_name()), ) created_entities.append(channel2) alert = summary.alerts.create( name, alerter, notification_channels=[channel1], ) retrieved_channel_ids = alert._msg.notification_channels.keys() assert set(retrieved_channel_ids) == {channel1.id} alert.add_notification_channels([channel2]) alert._refresh_cache() retrieved_channel_ids = alert._msg.notification_channels.keys() assert set(retrieved_channel_ids) == {channel1.id, channel2.id}
def test_run_log_commit(self, client_2, client_3, organization, created_entities): """Log someone else's commit to my run.""" organization.add_member(client_2._conn.email) organization.add_member(client_3._conn.email) client_2.set_workspace(organization.name) client_3.set_workspace(organization.name) created_entities.append(client_2.create_project()) run = client_2.create_experiment_run() # private commit repo = client_3.set_repository(_utils.generate_default_name(), visibility=Private()) created_entities.append(repo) commit = repo.get_commit() with pytest.raises(requests.HTTPError, match="Access Denied|Forbidden"): run.log_commit(commit) # org commit repo = client_3.set_repository(_utils.generate_default_name()) created_entities.append(repo) commit = repo.get_commit() run.log_commit(commit) assert run.get_commit()[0].id == commit.id
def test_profiler_crud(self, client): requirements = ["numpy", "scipy", "pandas"] for req in requirements: pytest.importorskip(req) profilers = client.operations.profilers profiler_name = "age_column_profiler_{}".format(generate_default_name()) python_env = Python(requirements=requirements) created_profiler = profilers.upload(profiler_name, ContinuousHistogramProfiler(columns=["age"]), environment=python_env) assert isinstance(created_profiler, ProfilerReference) retrieved_profiler = profilers.get(created_profiler.id) assert isinstance(retrieved_profiler, ProfilerReference) assert created_profiler.id == retrieved_profiler.id listed_profilers = profilers.list() assert len(listed_profilers) > 1 assert created_profiler.id in map(lambda p: p.id, listed_profilers) old_name = created_profiler.name old_profiler_version = created_profiler.reference new_name = "profiler2_{}".format(generate_default_name()) created_profiler.update(new_name) assert created_profiler.name == new_name assert created_profiler.name != old_name assert old_profiler_version == created_profiler.reference delete = profilers.delete(created_profiler) assert delete
def config_file_with_type_util(self, connect): PROJECT_NAME = _utils.generate_default_name() DATASET_NAME = _utils.generate_default_name() EXPERIMENT_NAME = _utils.generate_default_name() CONFIG_FILENAME = "verta_config.json" HOST_KEY, EMAIL_KEY, DEV_KEY_KEY = "VERTA_HOST", "VERTA_EMAIL", "VERTA_DEV_KEY" HOST, EMAIL, DEV_KEY = os.environ[HOST_KEY], os.environ[EMAIL_KEY], os.environ[DEV_KEY_KEY] try: del os.environ[HOST_KEY], os.environ[EMAIL_KEY], os.environ[DEV_KEY_KEY] try: with open(CONFIG_FILENAME, 'w') as f: json.dump( { 'host': HOST, 'email': EMAIL, 'dev_key': DEV_KEY, 'project': PROJECT_NAME, 'experiment': EXPERIMENT_NAME, 'dataset': DATASET_NAME, }, f, ) client = verta.Client(_connect=connect) conn = client._conn back_end_url = urlparse(HOST) socket = back_end_url.netloc + back_end_url.path.rstrip('/') assert conn.socket == socket assert conn.auth['Grpc-Metadata-email'] == EMAIL assert conn.auth['Grpc-Metadata-developer_key'] == DEV_KEY assert conn.auth['Grpc-Metadata-developer-key'] == DEV_KEY if connect: try: assert client.set_experiment_run() assert client.proj.name == PROJECT_NAME assert client.expt.name == EXPERIMENT_NAME finally: if client.proj is not None: client.proj.delete() dataset = client.set_dataset() try: assert dataset.name == DATASET_NAME finally: dataset.delete() else: assert client._set_from_config_if_none(None, "project") == PROJECT_NAME assert client._set_from_config_if_none(None, "experiment") == EXPERIMENT_NAME assert client._set_from_config_if_none(None, "dataset") == DATASET_NAME finally: if os.path.exists(CONFIG_FILENAME): os.remove(CONFIG_FILENAME) finally: os.environ[HOST_KEY], os.environ[EMAIL_KEY], os.environ[DEV_KEY_KEY] = HOST, EMAIL, DEV_KEY
def test_get_by_id(self, client, created_entities): path = _utils.generate_default_name() path2 = _utils.generate_default_name() endpoint = client.set_endpoint(path) created_entities.append(endpoint) dummy_endpoint = client.set_endpoint(path2) # in case get erroneously fetches latest created_entities.append(dummy_endpoint) assert endpoint.id == client.set_endpoint(id=endpoint.id).id
def test_path_ensure_starts_with_slash(self, client, created_entities): # without slash path = _utils.generate_default_name() endpoint = client.create_endpoint(path) created_entities.append(endpoint) expected_path = "/" + path assert endpoint.path == expected_path # with slash path = "/" + _utils.generate_default_name() endpoint = client.create_endpoint(path) created_entities.append(endpoint) assert endpoint.path == path
def test_crud(self, client, summary, summary_sample): name = _utils.generate_default_name() alerter = ReferenceAlerter(comparison.GreaterThan(0.7), summary_sample) created_alert = summary.alerts.create(name, alerter) assert isinstance(created_alert, Alert) assert created_alert._msg.alerter_type == alerter._TYPE assert created_alert.monitored_entity_id == summary.monitored_entity_id assert summary.id in created_alert.summary_sample_query.summary_query._ids retrieved_alert = summary.alerts.get(id=created_alert.id) client_retrieved_alert = client.monitoring.alerts.get( id=created_alert.id) assert retrieved_alert.id == client_retrieved_alert.id assert isinstance(retrieved_alert, Alert) assert retrieved_alert._msg.alerter_type == alerter._TYPE assert retrieved_alert.alerter._as_proto() == alerter._as_proto() assert retrieved_alert.alerter._reference_sample_id == summary_sample.id listed_alerts = summary.alerts.list() assert created_alert.id in map(lambda a: a.id, listed_alerts) client_listed_alerts = client.monitoring.alerts.list() assert created_alert.id in map(lambda a: a.id, client_listed_alerts) assert summary.alerts.delete([created_alert])
def test_create(self, client): name = _utils.generate_default_name() org = client._create_organization(name) assert org assert org.id == client._get_organization(name).id org.delete()
def test_crud(self, client, monitored_entity, summary_sample): alerts = monitored_entity.alerts name = _utils.generate_default_name() alerter = ReferenceAlerter(comparison.GreaterThan(0.7), summary_sample) sample_query = SummarySampleQuery() created_alert = alerts.create(name, alerter, sample_query) assert isinstance(created_alert, _entities.Alert) assert created_alert._msg.alerter_type == alerter._TYPE assert created_alert.monitored_entity_id == monitored_entity.id retrieved_alert = alerts.get(id=created_alert.id) client_retrieved_alert = client.operations.alerts.get( id=created_alert.id) assert retrieved_alert.id == client_retrieved_alert.id assert isinstance(retrieved_alert, _entities.Alert) assert retrieved_alert._msg.alerter_type == alerter._TYPE assert retrieved_alert.alerter._as_proto() == alerter._as_proto() assert retrieved_alert.alerter._reference_sample_id == summary_sample.id listed_alerts = alerts.list() assert created_alert.id in map(lambda a: a.id, listed_alerts) client_listed_alerts = client.operations.alerts.list() assert created_alert.id in map(lambda a: a.id, client_listed_alerts) assert alerts.delete([created_alert])
def test_top_level_entities(self, client, organization, created_entities, entity_name): set_entity = getattr(client, "set_{}".format(entity_name)) try: # default workspace entity = set_entity(_utils.generate_default_name()) created_entities.append(entity) assert entity.workspace == client._conn.get_default_workspace() # organization workspace entity = set_entity(_utils.generate_default_name(), workspace=organization.name) created_entities.append(entity) assert entity.workspace == organization.name finally: client._ctx.proj = None # otherwise client teardown tries to delete
def test_crud_workspace(self, client, organization, strs, created_entities): strs = iter(strs) name = _utils.generate_default_name() workspace = organization.name notification_channels = client.operations.notification_channels personal_channel = notification_channels.create( name, SlackNotificationChannel(next(strs)), ) created_entities.append(personal_channel) assert personal_channel.workspace == client.get_workspace() assert personal_channel.id == notification_channels.get(name).id listed_channels = notification_channels.list() assert personal_channel.id in [c.id for c in listed_channels] # same name, different workspace org_channel = notification_channels.create( name, SlackNotificationChannel(next(strs)), workspace=workspace, ) created_entities.append(org_channel) assert org_channel.workspace == workspace with pytest.warns(None) as record: assert org_channel.id == notification_channels.get( name, workspace=workspace).id assert not record # no warning of multiple channels found listed_channels = notification_channels.list(workspace=workspace) assert len(listed_channels) == 1 assert org_channel.id == listed_channels[0].id
def test_canary_update_endpoint_env_vars(self, client, created_endpoints, experiment_run, model_for_deployment): endpoint_name = _utils.generate_default_name() endpoint = client.set_endpoint(endpoint_name) created_endpoints.append(endpoint) original_status = endpoint.get_status() original_build_ids = get_build_ids(original_status) experiment_run.log_model(model_for_deployment['model'], custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) canary_rule = json.dumps({ "rule": "latency_avg_max", "rule_parameters": [{"name": "threshold", "value": 0.8}]} ) runner = CliRunner() result = runner.invoke( cli, ['deployment', 'update', 'endpoint', endpoint_name, '--run-id', experiment_run.id, "-s", "canary", '-c', canary_rule, '-i', 1, "--canary-step", 0.3, '--env-vars', '{"VERTA_HOST": "app.verta.ai"}'], ) assert not result.exception updated_build_ids = get_build_ids(endpoint.get_status()) assert len(updated_build_ids) - len(updated_build_ids.intersection(original_build_ids)) > 0
def test_endpoint_update_run(self, client_2, client_3, organization, created_entities): """Update endpoint from someone else's run.""" LogisticRegression = pytest.importorskip("sklearn.linear_model").LogisticRegression organization.add_member(client_2._conn.email) organization.add_member(client_3._conn.email) client_2.set_workspace(organization.name) client_3.set_workspace(organization.name) endpoint = client_2.create_endpoint(_utils.generate_default_name()) created_entities.append(endpoint) # private run created_entities.append(client_3.create_project(visibility=Private())) run = client_3.create_experiment_run() run.log_model(LogisticRegression(), custom_modules=[]) run.log_environment(Python(["scikit-learn"])) with pytest.raises(requests.HTTPError, match="Access Denied|Forbidden"): endpoint.update(run) # org run, deploy=False created_entities.append(client_3.create_project(visibility=OrgCustom(deploy=False))) run = client_3.create_experiment_run() run.log_model(LogisticRegression(), custom_modules=[]) run.log_environment(Python(["scikit-learn"])) with pytest.raises(requests.HTTPError, match="Access Denied|Forbidden"): endpoint.update(run) # org run, deploy=True created_entities.append(client_3.create_project(visibility=OrgCustom(deploy=True))) run = client_3.create_experiment_run() run.log_model(LogisticRegression(), custom_modules=[]) run.log_environment(Python(["scikit-learn"])) assert endpoint.update(run)
def test_configure_endpoint(self, client, model_version, strs): LogisticRegression = pytest.importorskip( "sklearn.linear_model").LogisticRegression strs = iter(strs) model_version.log_model( LogisticRegression, custom_modules=[], ) model_version.log_environment(Python(["scikit-learn"])) # create kafka_settings = KafkaSettings(next(strs), next(strs), next(strs)) endpoint = client.create_endpoint(_utils.generate_default_name(), kafka_settings=kafka_settings) assert endpoint.kafka_settings == kafka_settings # update kafka_settings = KafkaSettings(next(strs), next(strs), next(strs)) endpoint.update(model_version, kafka_settings=kafka_settings) assert endpoint.kafka_settings == kafka_settings # clear endpoint.update(model_version, kafka_settings=False) assert endpoint.kafka_settings is None
def test_get(self, client, created_endpoints, experiment_run, model_for_deployment): experiment_run.log_model(model_for_deployment['model'], custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) path = _utils.generate_default_name() endpoint = client.set_endpoint(path) created_endpoints.append(endpoint) runner = CliRunner() result = runner.invoke( cli, ['deployment', 'get', 'endpoint', path], ) assert not result.exception assert "path: {}".format(endpoint.path) in result.output assert "id: {}".format(endpoint.id) in result.output assert "curl: <Endpoint not deployed>" in result.output assert "status" in result.output assert "date created" in result.output assert "date updated" in result.output assert "stage's date created" in result.output assert "stage's date updated" in result.output assert "components" in result.output updated_status = endpoint.update(experiment_run, DirectUpdateStrategy(), True) result = runner.invoke( cli, ['deployment', 'get', 'endpoint', path], ) assert "curl: {}".format( endpoint.get_deployed_model().get_curl()) in result.output
def test_update_with_resources(self, client, created_endpoints, experiment_run, model_for_deployment): endpoint_name = _utils.generate_default_name() endpoint = client.set_endpoint(endpoint_name) created_endpoints.append(endpoint) original_status = endpoint.get_status() original_build_ids = get_build_ids(original_status) experiment_run.log_model(model_for_deployment['model'], custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) resources = '{"cpu": 0.25, "memory": "100M"}' runner = CliRunner() result = runner.invoke( cli, [ 'deployment', 'update', 'endpoint', endpoint_name, '--run-id', experiment_run.id, "-s", "direct", '--resources', resources ], ) assert not result.exception resources_dict = Resources._from_dict(json.loads( resources))._as_dict() # config is `cpu`, wire is `cpu_millis` assert endpoint.get_update_status( )['update_request']['resources'] == resources_dict
def test_predict(self, client, experiment_run, created_endpoints): np = pytest.importorskip("numpy") sklearn = pytest.importorskip("sklearn") from sklearn.linear_model import LogisticRegression classifier = LogisticRegression() classifier.fit(np.random.random((36, 12)), np.random.random(36).round()) test_data = np.random.random((4, 12)) test_data_str = json.dumps(test_data.tolist()) experiment_run.log_model(classifier, custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) path = _utils.generate_default_name() endpoint = client.set_endpoint(path) created_endpoints.append(endpoint) endpoint.update(experiment_run, DirectUpdateStrategy(), wait=True) runner = CliRunner() result = runner.invoke( cli, [ 'deployment', 'predict', 'endpoint', path, '--data', test_data_str ], ) assert not result.exception assert json.dumps( classifier.predict(test_data).tolist()) in result.output
def test_update_from_version(self, client, model_version, created_endpoints): np = pytest.importorskip("numpy") sklearn = pytest.importorskip("sklearn") from sklearn.linear_model import LogisticRegression classifier = LogisticRegression() classifier.fit(np.random.random((36, 12)), np.random.random(36).round()) model_version.log_model(classifier) env = Python(requirements=["scikit-learn"]) model_version.log_environment(env) path = _utils.generate_default_name() endpoint = client.set_endpoint(path) created_endpoints.append(endpoint) runner = CliRunner() result = runner.invoke( cli, [ 'deployment', 'update', 'endpoint', path, '--model-version-id', model_version.id, "--strategy", "direct" ], ) assert not result.exception while not endpoint.get_status()['status'] == "active": time.sleep(3) test_data = np.random.random((4, 12)) assert np.array_equal(endpoint.get_deployed_model().predict(test_data), classifier.predict(test_data))
def endpoint(client): path = _utils.generate_default_name() endpoint = client.create_endpoint(path) yield endpoint endpoint.delete()
def test_direct_update_endpoint(self, client, created_endpoints, experiment_run, model_for_deployment): endpoint_name = _utils.generate_default_name() endpoint = client.set_endpoint(endpoint_name) created_endpoints.append(endpoint) original_status = endpoint.get_status() original_build_ids = get_build_ids(original_status) experiment_run.log_model(model_for_deployment['model'], custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) runner = CliRunner() result = runner.invoke( cli, [ 'deployment', 'update', 'endpoint', endpoint_name, '--run-id', experiment_run.id, "--strategy", "direct" ], ) assert not result.exception updated_build_ids = get_build_ids(endpoint.get_status()) assert len(updated_build_ids) - len( updated_build_ids.intersection(original_build_ids)) > 0
def organization(client): workspace_name = _utils.generate_default_name() org = client._create_organization(workspace_name) yield org org.delete()
def repository(client): name = _utils.generate_default_name() repo = client.get_or_create_repository(name) yield repo repo.delete()
def create_dummy_workspace(): """Prevent tests from uncontrollably changing accounts' default workspace. When an account creates its first organization, or is added to its first organization, UAC sets that organization as the account's default workspace. This is undesired during test runs, because several tests rely on new arbitrary orgs *not* being the active client's default workspace. This fixture creates a dummy "first" organization for each account, so that organizations created for individual tests won't trigger this behavior from UAC. """ dummy_orgs = [] for client in clean_test_accounts.get_clients(): current_default_workspace = client._conn.get_default_workspace() name = _utils.generate_default_name() dummy_orgs.append(client._create_organization(name)) client._conn._set_default_workspace(current_default_workspace) yield for org in dummy_orgs: org.delete()
def test_create_same_name_diff_workspace(self, client, organization, created_endpoints, created_registered_models, created_datasets): # creating some entities: project_name = _utils.generate_default_name() exp_name = _utils.generate_default_name() run_name = _utils.generate_default_name() dataset_name = _utils.generate_default_name() repository_name = _utils.generate_default_name() model_name = _utils.generate_default_name() version_name = _utils.generate_default_name() endpoint_path = _utils.generate_default_name() project = client.create_project(project_name) exp = client.create_experiment(exp_name) run = client.create_experiment_run(run_name) repository = client.get_or_create_repository(name=repository_name) dataset = client._create_dataset2(dataset_name) created_datasets.append(dataset) model = client.create_registered_model(name=model_name) version = model.create_version(name=version_name) created_registered_models.append(model) endpoint = client.create_endpoint(path=endpoint_path) created_endpoints.append(endpoint) # create entities with same name, but different workspace: new_model = client.create_registered_model(name=model_name, workspace=organization.name) new_version = new_model.create_version(name=version_name) # new_endpoint = client.create_endpoint(path=endpoint_path, workspace=organization.name) TODO: uncomment after VR-6053 # TODO: remove followinng three lines after VR-6053; until then, endpoints with same name diff workspace is a 409 with pytest.raises(requests.HTTPError) as excinfo: client.create_endpoint(path=endpoint_path, workspace=organization.name) assert excinfo.value.response.status_code == 409 new_project = client.create_project(project_name, workspace=organization.name) new_exp = client.create_experiment(exp_name) new_run = client.create_experiment_run(run_name) new_repository = client.get_or_create_repository(name=repository_name, workspace=organization.name) new_dataset = client._create_dataset2(dataset_name, workspace=organization.name) created_datasets.append(new_dataset) # created_endpoints.append(new_endpoint) TODO: uncomment after VR-6053 created_registered_models.append(new_model) assert model.id != new_model.id assert version.id != new_version.id # assert endpoint.id != new_endpoint.id TODO: uncommment after VR-6053 assert project.id != new_project.id assert exp.id != new_exp.id assert run.id != new_run.id assert dataset.id != new_dataset.id assert repository.id != new_repository.id project.delete() # have to delete manually because creating dataset makes project untracked by client context. new_project.delete() repository.delete()
def test_list_endpoint(self, created_endpoints): client = Client() path = _utils.generate_default_name() path2 = _utils.generate_default_name() endpoint1 = client.get_or_create_endpoint(path) endpoint2 = client.get_or_create_endpoint(path2) created_endpoints.append(endpoint1) created_endpoints.append(endpoint2) runner = CliRunner() result = runner.invoke( cli, ['deployment', 'list', 'endpoint'], ) assert not result.exception assert path in result.output assert path2 in result.output
def summary(client, monitored_entity, created_entities): summary = client.monitoring.summaries.create( _utils.generate_default_name(), data_types.NumericValue, monitored_entity, ) return summary
def test_repr(self, summary, alerter): """__repr__() does not raise exceptions""" name = _utils.generate_default_name() created_alert = summary.alerts.create(name, alerter) assert repr(created_alert) retrieved_alert = summary.alerts.get(id=created_alert.id) assert repr(retrieved_alert)
def test_update_from_json_config(self, client, in_tempdir, created_endpoints, experiment_run, model_for_deployment): json = pytest.importorskip("json") experiment_run.log_model(model_for_deployment['model'], custom_modules=[]) experiment_run.log_requirements(['scikit-learn']) path = _utils.generate_default_name() endpoint = client.set_endpoint(path) created_endpoints.append(endpoint) original_status = endpoint.get_status() original_build_ids = get_build_ids(original_status) # Creating config dict: strategy_dict = { "run_id": experiment_run.id, "strategy": "canary", "canary_strategy": { "progress_step": 0.05, "progress_interval_seconds": 30, "rules": [{ "rule": "latency_avg_max", "rule_parameters": [{ "name": "threshold", "value": "0.1" }] }, { "rule": "error_4xx_rate", "rule_parameters": [{ "name": "threshold", "value": "1" }] }] } } filepath = "config.json" with open(filepath, 'w') as f: json.dump(strategy_dict, f) runner = CliRunner() result = runner.invoke( cli, ['deployment', 'update', 'endpoint', path, "-f", filepath], ) assert not result.exception print(endpoint.get_update_status()) updated_build_ids = get_build_ids(endpoint.get_status()) assert len(updated_build_ids) - len( updated_build_ids.intersection(original_build_ids)) > 0
def test_endpoint(self, client, organization, created_entities): visibility = OrgCustom(write=True) endpoint = client.create_endpoint( path=_utils.generate_default_name(), workspace=organization.name, visibility=visibility, ) created_entities.append(endpoint) assert_endpoint_visibility(endpoint, visibility)