def test_create_project_throws_error_if_duplicate(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem._exists.return_value = True project = domain.Project(f"Test Project {uuid.uuid4()}") with pytest.raises(RubiconException) as e: asyncio.run(asyn_repo_w_mock_filesystem.create_project(domain.Project(project.name))) assert f"'{project.name}' already exists" in str(e)
def test_get_dataframes_metadata_with_no_results(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem._ls.side_effect = FileNotFoundError() project = domain.Project(f"Test Project {uuid.uuid4()}") dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name)) assert dataframes == []
def _create_experiment_domain(project=None, tags=[]): if project is None: project = domain.Project(f"Test Project {uuid.uuid4()}") return domain.Experiment( name=f"Test Experiment {uuid.uuid4()}", project_name=project.name, tags=[] )
def test_get_experiments_with_no_results(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem._ls.side_effect = FileNotFoundError() project = domain.Project(f"Test Project {uuid.uuid4()}") experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name)) assert experiments == []
def test_create_project_throws_error_if_duplicate(memory_repository): repository = memory_repository project = _create_project(repository) with pytest.raises(RubiconException) as e: repository.create_project(domain.Project(project.name)) assert f"'{project.name}' already exists" in str(e)
def test_get_project(asyn_repo_w_mock_filesystem): written_project = domain.Project(f"Test Project {uuid.uuid4()}") asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_project) project = asyncio.run(asyn_repo_w_mock_filesystem.get_project(written_project.name)) assert project.id == written_project.id assert project.name == written_project.name
def test_persist_domain(mock_open): project = domain.Project(f"Test Project {uuid.uuid4()}") project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json" s3_repo = S3Repository(root_dir="s3://bucket/root") s3_repo._persist_domain(project, project_metadata_path) mock_open.assert_called_once_with(project_metadata_path, "w")
def test_sync_from_local(mock_get_project, mock_run): rubicon = Rubicon(persistence="filesystem", root_dir="./local/path") project_name = "Sync Test Project" mock_get_project.return_value = client.Project( domain.Project(project_name)) rubicon.sync(project_name, "s3://test/path") assert "aws s3 sync ./local/path/sync-test-project s3://test/path" in str( mock_run._mock_call_args_list)
def test_delete_artifact_throws_error_if_not_found(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem.rm.side_effect = FileNotFoundError() project = domain.Project(f"Test Project {uuid.uuid4()}") missing_artifact_id = uuid.uuid4() with pytest.raises(RubiconException) as e: asyncio.run(asyn_repo_w_mock_filesystem.delete_artifact(project.name, missing_artifact_id)) assert f"No artifact with id `{missing_artifact_id}`" in str(e)
def test_persist_domain(mock_mkdirs, mock_open): project = domain.Project(f"Test Project {uuid.uuid4()}") project_metadata_path = f"/local/root/{slugify(project.name)}/metadata.json" local_repo = LocalRepository(root_dir="/local/root") local_repo._persist_domain(project, project_metadata_path) mock_mkdirs.assert_called_once_with(os.path.dirname(project_metadata_path), exist_ok=True) mock_open.assert_called_once_with(project_metadata_path, "w")
def test_persist_domain(asyn_s3_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json" asyncio.run( asyn_s3_repo_w_mock_filesystem._persist_domain(project, project_metadata_path)) expected = [call._pipe_file(project_metadata_path, json.dumps(project))] assert asyn_s3_repo_w_mock_filesystem.filesystem.mock_calls == expected
def test_get_dataframe_metadata_throws_error_if_not_found(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = FileNotFoundError() project = domain.Project(f"Test Project {uuid.uuid4()}") missing_dataframe_id = uuid.uuid4() with pytest.raises(RubiconException) as e: asyncio.run( asyn_repo_w_mock_filesystem.get_dataframe_metadata(project.name, missing_dataframe_id) ) assert f"No dataframe with id `{missing_dataframe_id}`" in str(e)
def test_persist_domain_throws_error(mock_mkdirs, mock_open): not_serializable = str project = domain.Project(f"Test Project {uuid.uuid4()}", description=not_serializable) project_metadata_path = f"/local/root/{slugify(project.name)}/metadata.json" local_repo = LocalRepository(root_dir="/local/root") with pytest.raises(TypeError): local_repo._persist_domain(project, project_metadata_path) mock_mkdirs.assert_not_called() mock_open.assert_not_called()
def test_sync_from_local_error(mock_get_project, mock_run): rubicon = Rubicon(persistence="filesystem", root_dir="./local/path") project_name = "Sync Test Project" mock_get_project.return_value = client.Project( domain.Project(project_name)) mock_run.side_effect = subprocess.CalledProcessError( cmd="aws cli sync", stderr="Some error. I bet it was proxy tho.", returncode=1) with pytest.raises(RubiconException) as e: rubicon.sync(project_name, "s3://test/path") assert "Some error. I bet it was proxy tho." in str(e)
def test_get_or_create_project_get(asyn_client_w_mock_repo): rubicon = asyn_client_w_mock_repo project_name = f"Test Project {uuid.uuid4()}" project_domain = domain.Project(name=project_name) rubicon.repository.get_project.return_value = project_domain project = asyncio.run(rubicon.get_or_create_project(project_name)) expected = [call.get_project(project.name)] assert project.name == project_name assert rubicon.repository.mock_calls == expected
def test_create_project(asyn_repo_w_mock_filesystem): asyn_repo_w_mock_filesystem.filesystem._exists.return_value = False project = domain.Project(f"Test Project {uuid.uuid4()}") project_dir = slugify(project.name) project_metadata_path = f"{asyn_repo_w_mock_filesystem.root_dir}/{project_dir}/metadata.json" asyncio.run(asyn_repo_w_mock_filesystem.create_project(project)) filesystem_expected = [call._exists(project_metadata_path), call.invalidate_cache()] repo_expected = [call._persist_domain(project, project_metadata_path)] assert asyn_repo_w_mock_filesystem.filesystem.mock_calls == filesystem_expected assert asyn_repo_w_mock_filesystem._persist_domain.mock_calls == repo_expected
def _create_project_domain(self, name, description, github_url, training_metadata): """Instantiates and returns a project domain object.""" if self.config.is_auto_git_enabled and github_url is None: github_url = self._get_github_url() if training_metadata is not None: training_metadata = domain.utils.TrainingMetadata( training_metadata) return domain.Project( name, description=description, github_url=github_url, training_metadata=training_metadata, )
def test_properties(): domain_project = domain.Project( "Test Project", description="a test project", github_url="github.com", training_metadata=domain.utils.TrainingMetadata([ ("test/path", "SELECT * FROM test") ]), ) project = Project(domain_project) assert project.name == "Test Project" assert project.description == "a test project" assert project.github_url == "github.com" assert project.training_metadata == domain_project.training_metadata.training_metadata[ 0] assert project.created_at == domain_project.created_at assert project.id == domain_project.id
def get_projects(self): """Get the list of projects from the filesystem. Returns ------- list of rubicon.domain.Project The list of projects from the filesystem. """ try: project_metadata_paths = self._ls_directories_only(self.root_dir) projects = [ domain.Project(**json.loads(metadata)) for metadata in self.filesystem.cat(project_metadata_paths).values() ] except FileNotFoundError: return [] return projects
def get_project(self, project_name): """Retrieve a project from the configured filesystem. Parameters ---------- project_name : str The name of the project to retrieve. Returns ------- rubicon.domain.Project The project with name `project_name`. """ project_metadata_path = self._get_project_metadata_path(project_name) try: project = json.loads(self.filesystem.cat(project_metadata_path)) except FileNotFoundError: raise RubiconException(f"No project with name '{project_name}' found.") return domain.Project(**project)
def test_get_projects(asyn_client_w_mock_repo): rubicon = asyn_client_w_mock_repo project_domains = [ domain.Project(name=f"Test Project {uuid.uuid4()}") for _ in range(0, 3) ] rubicon.repository.get_projects.return_value = project_domains projects = asyncio.run(rubicon.projects()) expected = [call.get_projects()] project_ids = [p.id for p in projects] for pid in [p.id for p in project_domains]: assert pid in project_ids project_ids.remove(pid) assert len(project_ids) == 0 assert rubicon.repository.mock_calls == expected
async def get_projects(self): """Overrides `rubicon.repository.BaseRepository.get_projects` to asynchronously get the list of projects from the filesystem. Returns ------- list of rubicon.domain.Project The list of projects from the filesystem. """ try: project_metadata_paths = await self._ls_directories_only(self.root_dir) projects = [ domain.Project(**json.loads(data)) for data in await asyncio.gather( *[self.filesystem._cat_file(path) for path in project_metadata_paths] ) ] except FileNotFoundError: return [] return projects
def test_get_projects(asyn_repo_w_mock_filesystem): written_projects = [domain.Project(f"Test Project {uuid.uuid4()}") for _ in range(0, 3)] project_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(p.name)}" for p in written_projects ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in project_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(e) for e in written_projects ] projects = asyncio.run(asyn_repo_w_mock_filesystem.get_projects()) assert len(projects) == 3 project_ids = [p.id for p in written_projects] for project in projects: assert project.id in project_ids project_ids.remove(project.id)
async def get_project(self, project_name): """Overrides `rubicon.repository.BaseRepository.get_project` to asynchronously retrieve a project from the configured filesystem. Parameters ---------- project_name : str The name of the project to retrieve. Returns ------- rubicon.domain.Project The project with name `project_name`. """ project_metadata_path = self._get_project_metadata_path(project_name) try: project = json.loads(await self.filesystem._cat_file(project_metadata_path)) except FileNotFoundError: raise RubiconException(f"No project with name '{project_name}' found.") return domain.Project(**project)
def test_get_experiments(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_experiments = [_create_experiment_domain(project=project) for _ in range(0, 3)] experiment_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(e.project_name)}/experiments/{e.id}" for e in written_experiments ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in experiment_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(e) for e in written_experiments ] experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name)) assert len(experiments) == 3 experiment_ids = [e.id for e in written_experiments] for experiment in experiments: assert experiment.id in experiment_ids experiment_ids.remove(experiment.id)
def test_get_projects_as_dask_df(asyn_client_w_mock_repo): rubicon = asyn_client_w_mock_repo project_name = f"Test Project {uuid.uuid4()}" project_domain = domain.Project(name=project_name) experiment_domains = [ domain.Experiment(project_name=project_name, name=f"Test Experiment {uuid.uuid4()}") for _ in range(0, 2) ] rubicon.repository.get_project.return_value = project_domain rubicon.repository.get_experiments.return_value = experiment_domains rubicon.repository.get_tags.return_value = [{ "added_tags": [], "removed_tags": [] }] rubicon.repository.get_parameters.return_value = [] rubicon.repository.get_metrics.return_value = [] ddf = asyncio.run(rubicon.get_project_as_dask_df(project_name)) assert isinstance(ddf, dd.core.DataFrame) assert len(ddf.compute()) == 2
def test_get_artifacts_metadata(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_artifacts = [_create_artifact_domain(project=project)[1] for _ in range(0, 3)] artifact_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/artifacts/{a.id}" for a in written_artifacts ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in artifact_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(a) for a in written_artifacts ] artifacts = asyncio.run(asyn_repo_w_mock_filesystem.get_artifacts_metadata(project.name)) assert len(artifacts) == 3 artifact_ids = [a.id for a in written_artifacts] for artifact in artifacts: assert artifact.id in artifact_ids artifact_ids.remove(artifact.id)
def test_get_dataframes_metadata(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_dataframes = [_create_dataframe_domain(project=project)[1] for _ in range(0, 3)] dataframe_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/dataframes/{d.id}" for d in written_dataframes ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in dataframe_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(d) for d in written_dataframes ] dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name)) assert len(dataframes) == 3 dataframe_ids = [d.id for d in written_dataframes] for dataframe in dataframes: assert dataframe.id in dataframe_ids dataframe_ids.remove(dataframe.id)
def _create_artifact_domain(project=None, tags=[]): if project is None: project = domain.Project(f"Test Project {uuid.uuid4()}") return project, domain.Artifact(name=f"Test Artifact {uuid.uuid4()}", parent_id=project.id)
def _create_dataframe_domain(project=None, tags=[]): if project is None: project = domain.Project(f"Test Project {uuid.uuid4()}") return project, domain.Dataframe(parent_id=project.id)