class TestObservations: observations = { utils.gen_str(): [utils.gen_str(), utils.gen_str()], utils.gen_str(): [utils.gen_int(), utils.gen_int()], utils.gen_str(): [utils.gen_float(), utils.gen_float()], } def test_single(self, experiment_run): for key, vals in six.viewitems(self.observations): for val in vals: experiment_run.log_observation(key, val) with pytest.raises(KeyError): experiment_run.get_observation(utils.gen_str()) for key, val in six.viewitems(self.observations): assert [ obs_val for obs_val, _ in experiment_run.get_observation(key) ] == val assert { key: [obs_val for obs_val, _ in obs_seq] for key, obs_seq in experiment_run.get_observations().items() } == self.observations def test_log_collection(self, experiment_run): with pytest.raises(TypeError): # single fn, list experiment_run.log_observation(utils.gen_str(), utils.gen_list()) with pytest.raises(TypeError): # single fn, dict experiment_run.log_observation(utils.gen_str(), utils.gen_dict())
def test_log_collection(self, experiment_run): with pytest.raises(TypeError): # single fn, list experiment_run.log_metric(utils.gen_str(), utils.gen_list()) with pytest.raises(TypeError): # batch fn, list experiment_run.log_metrics({utils.gen_str(): utils.gen_list()}) with pytest.raises(TypeError): # single fn, dict experiment_run.log_metric(utils.gen_str(), utils.gen_dict()) with pytest.raises(TypeError): # batch fn, dict experiment_run.log_metrics({utils.gen_str(): utils.gen_dict()})
def test_atomic(self, experiment_run): """Test if batch completely fails even if only a single key conflicts.""" experiment_run.log_metrics(self.metrics) for key, val in six.viewitems(self.metrics): with pytest.raises(ValueError): experiment_run.log_metrics({ key: val, utils.gen_str(): utils.gen_str(), }) assert experiment_run.get_metrics() == self.metrics
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = client.set_dataset(name=name, type="local") version = dataset.create_version(__file__) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id
def run_fake_experiment(client): run = client.set_experiment_run() run.log_attribute("is_test", True) run.get_attribute("is_test") run.log_hyperparameters({ 'C': utils.gen_float(), 'solver': utils.gen_str(), 'max_iter': utils.gen_int(), }) run.get_hyperparameter("C") run.get_hyperparameter("solver") run.get_hyperparameter("max_iter") run.log_observation("rand_val", utils.gen_float()) run.log_observation("rand_val", utils.gen_float()) run.log_observation("rand_val", utils.gen_float()) run.get_observation("rand_val") run.log_metric("val_acc", utils.gen_float()) run.get_metric("val_acc") run.log_artifact("self", run) run.get_artifact("self")
def test_s3_dataset_version_creation(self, client, s3_bucket): name = utils.gen_str() dataset = client.create_s3_dataset("s3-" + name) dataset_version = client.create_s3_dataset_version(dataset, s3_bucket) assert len(dataset_version.dataset_version_info.dataset_part_infos) == 2
def test_big_query_dataset_version_creation(self, client, big_query_job): name = utils.gen_str() dataset = client.create_big_query_dataset("bq-" + name) dataset_version = client.create_big_query_dataset_version(dataset, job_id=big_query_job[0], location=big_query_job[1]) assert dataset_version.dataset_version_info.query == big_query_job[2]
def test_s3_dataset_creation(self, client): try: name = utils.gen_str() dataset = client.set_dataset("s3-" + name, type="s3") assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH except botocore.exceptions.ClientError: pytest.skip("insufficient AWS credentials")
def test_find_datasets_client_api(self, client): name1 = utils.gen_str() dataset1 = client.set_dataset( name=name1, type="big query", tags=["test1-" + name1, "test2-" + name1]) assert dataset1.dataset_type == _DatasetService.DatasetTypeEnum.QUERY assert dataset1.id name2 = utils.gen_str() dataset2 = client.set_dataset(name=name2, type="s3", tags=["test1"]) assert dataset2.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset2.id # TODO: update once RAW is supported # name = utils.gen_str() # dataset3 = client.set_dataset(name=name, type="raw") # assert dataset3.dataset_type == _DatasetService.DatasetTypeEnum.RAW # assert dataset3.id # datasets = client.find_datasets() # assert len(datasets) == 3 # assert datasets[0].id == dataset1.id # assert datasets[1].id == dataset2.id # assert datasets[2].id == dataset3.id datasets = client.find_datasets() assert len( datasets ) >= 2 # at least 2 because they were just created. Needs to be updated datasets = client.find_datasets( tags=["test1-" + name1, "test2-" + name1]) assert len(datasets) == 1 assert datasets[0].id == dataset1.id datasets = client.find_datasets(name=name1) assert len(datasets) == 1 assert datasets[0].id == dataset1.id datasets = client.find_datasets(dataset_ids=[dataset1.id, dataset2.id]) assert len(datasets) == 2 datasets = client.find_datasets(dataset_ids=[dataset1.id, dataset2.id], name=name1) assert len(datasets) == 1 assert datasets[0].id == dataset1.id
def test_store_plt(self, experiment_run): key = utils.gen_str() plt.scatter(*np.random.random((2, 10))) experiment_run.log_image(key, plt) assert np.array_equal( np.asarray(experiment_run.get_image(key).getdata()), np.asarray(self.matplotlib_to_pil(plt).getdata()))
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = Dataset(client._conn, client._conf, name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset.id
def test_empty(self, experiment_run): artifacts = { utils.gen_str(): six.BytesIO(), } for key, artifact in six.viewitems(artifacts): with pytest.raises(ValueError): experiment_run.log_artifact(key, artifact)
def test_filesystem_dataset_version_creation(self, client): dir_name, _ = self.create_dir_with_files(num_files=3) name = utils.gen_str() dataset = client.set_dataset("fs-" + name, type="local") dataset_version = dataset.create_version(dir_name) assert len( dataset_version.dataset_version_info.dataset_part_infos) == 3 shutil.rmtree(dir_name)
def test_get_all_datasets_client_api(self, client): name = utils.gen_str() dataset1 = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.QUERY) assert dataset1.dataset_type == _DatasetService.DatasetTypeEnum.QUERY assert dataset1.id name = utils.gen_str() dataset2 = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert dataset2.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset2.id name = utils.gen_str() dataset3 = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.RAW) assert dataset3.dataset_type == _DatasetService.DatasetTypeEnum.RAW assert dataset3.id
def test_store_fig(self, experiment_run): key = utils.gen_str() fig, ax = plt.subplots() ax.scatter(*np.random.random((2, 10))) experiment_run.log_image(key, fig) assert np.array_equal( np.asarray(experiment_run.get_image(key).getdata()), np.asarray(self.matplotlib_to_pil(fig).getdata()))
def test_creation_by_id_client_api(self, client): name = utils.gen_str() dataset = client.set_dataset(name=name, type="s3") assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset.id same_dataset = client.set_dataset(id=dataset.id) assert dataset.id == same_dataset.id assert dataset.name == same_dataset.name
def test_log_dataset_version(self, client, experiment_run, s3_bucket): name = utils.gen_str() dataset = client.create_s3_dataset("s3-" + name) assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH dataset_version = client.create_s3_dataset_version(dataset, s3_bucket) experiment_run.log_dataset_version('train', dataset_version) _, linked_id = experiment_run.get_dataset('train') assert linked_id == dataset_version.id
def test_s3_dataset_version_creation(self, client, s3_bucket): try: name = utils.gen_str() dataset = client.set_dataset("s3-" + name, type="s3") dataset_version = dataset.create_version(s3_bucket) assert len( dataset_version.dataset_version_info.dataset_part_infos) >= 1 except botocore.exceptions.ClientError: pytest.skip("insufficient AWS credentials")
def test_creation_by_id(self, client): name = utils.gen_str() dataset = client.set_dataset(name=name, type="local") version = dataset.create_version(__file__) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id same_version = client.get_dataset_version(id=version.id) assert version.id == same_version.id
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) version = client.create_dataset_version(dataset=dataset, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id
def test_creation_by_id(self, client): name = utils.gen_str() dataset = Dataset(client._conn, client._conf, name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset.id same_dataset = Dataset(client._conn, client._conf, _dataset_id=dataset.id) assert dataset.id == same_dataset.id
def test_get_dataset_client_api(self, client): name = utils.gen_str() dataset = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert dataset.id same_dataset = client.get_dataset(id=dataset.id) assert dataset.id == same_dataset.id assert dataset.name == same_dataset.name
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = Dataset(client._conn, client._conf, name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id
def test_batch(self, experiment_run): experiment_run.log_hyperparameters(self.hyperparameters) with pytest.raises(KeyError): experiment_run.get_hyperparameter(utils.gen_str()) for key, val in six.viewitems(self.hyperparameters): assert experiment_run.get_hyperparameter(key) == val assert experiment_run.get_hyperparameters() == self.hyperparameters
def test_get(self, experiment_run): artifacts = { utils.gen_str(): {utils.gen_str(): utils.gen_str() for _ in range(6)}, utils.gen_str(): {utils.gen_str(): utils.gen_str() for _ in range(6)}, utils.gen_str(): {utils.gen_str(): utils.gen_str() for _ in range(6)}, } for key, artifact in six.viewitems(artifacts): experiment_run.log_artifact(key, artifact) for key, artifact in six.viewitems(artifacts): assert experiment_run.get_artifact(key) == artifact with pytest.raises(KeyError): experiment_run.get_artifact(utils.gen_str())
def test_batch(self, experiment_run): experiment_run.log_metrics(self.metrics) with pytest.raises(KeyError): experiment_run.get_metric(utils.gen_str()) for key, val in six.viewitems(self.metrics): assert experiment_run.get_metric(key) == val assert experiment_run.get_metrics() == self.metrics
def test_rdbms_version_creation(self, client): name = utils.gen_str() dataset = client.set_dataset("pg-" + name, type="postgres") dataset_version = dataset.create_version( query="SELECT * FROM ner-table", db_connection_str="localhost:6543", num_records=100) assert dataset_version.dataset_version_info.query == "SELECT * FROM ner-table" assert dataset_version.dataset_version_info.data_source_uri == "localhost:6543" assert dataset_version.dataset_version_info.num_records == 100
def test_single(self, experiment_run): for key, val in six.viewitems(self.attributes): experiment_run.log_attribute(key, val) with pytest.raises(KeyError): experiment_run.get_attribute(utils.gen_str()) for key, val in six.viewitems(self.attributes): assert experiment_run.get_attribute(key) == val assert experiment_run.get_attributes() == self.attributes
def test_log_collection(self, experiment_run): # single fn, list key, value = utils.gen_str(), utils.gen_list() experiment_run.log_attribute(key, value) assert experiment_run.get_attribute(key) == value # batch fn, list key, value = utils.gen_str(), utils.gen_list() experiment_run.log_attributes({key: value}) assert experiment_run.get_attribute(key) == value # single fn, dict key, value = utils.gen_str(), utils.gen_dict() experiment_run.log_attribute(key, value) assert experiment_run.get_attribute(key) == value # batch fn, dict key, value = utils.gen_str(), utils.gen_dict() experiment_run.log_attributes({key: value}) assert experiment_run.get_attribute(key) == value
def test_store_pil(self, experiment_run): key = utils.gen_str() img = PIL.Image.new('RGB', (64, 64), 'white') PIL.ImageDraw.Draw(img).arc( np.r_[np.random.randint(32, size=(2)), np.random.randint(32, 64, size=(2))].tolist(), np.random.randint(360), np.random.randint(360), 'black') experiment_run.log_image(key, img) assert (np.array_equal( np.asarray(experiment_run.get_image(key).getdata()), np.asarray(img.getdata())))