def get_tags(self, project_name, experiment_id=None, dataframe_id=None): """Retrieve tags from the configured filesystem. Parameters ---------- project_name : str The name of the project the object to retrieve tags from belongs to. experiment_id : str, optional The ID of the experiment to retrieve tags from. dataframe_id : str, optional The ID of the dataframe to retrieve tags from. Returns ------- list of dict A list of dictionaries with one key each, `added_tags` or `removed_tags`, where the value is a list of tag names that have been added to or removed from the specified object. """ tag_metadata_root = self._get_tag_metadata_root(project_name, experiment_id, dataframe_id) tag_metadata_glob = f"{tag_metadata_root}/tags_*.json" tag_paths = self.filesystem.glob(tag_metadata_glob, detail=True) if len(tag_paths) == 0: return [] sorted_tag_paths = self._sort_tag_paths(tag_paths) tag_data = self.filesystem.cat([p for _, p in sorted_tag_paths]) sorted_tag_data = [json.loads(tag_data[p]) for _, p in sorted_tag_paths] return sorted_tag_data
def get_parameters(self, project_name, experiment_id): """Retrieve all parameters from the configured filesystem that belong to the experiment with ID `experiment_id`. Parameters ---------- project_name : str The name of the project the experiment with ID `experiment_id` is logged to. experiment_id : str The ID of the experiment to retrieve all parameters from. Returns ------- list of rubicon.domain.Parameter The parameters logged to the experiment with ID `experiment_id`. """ parameter_metadata_root = self._get_parameter_metadata_root( project_name, experiment_id) try: parameter_metadata_paths = self._ls_directories_only( parameter_metadata_root) parameters = [ domain.Parameter(**json.loads(data)) for data in self.filesystem.cat(parameter_metadata_paths).values() ] except FileNotFoundError: return [] return parameters
async def get_parameter(self, project_name, experiment_id, parameter_name): """Overrides `rubicon.repository.BaseRepository.get_parameter` to asynchronously retrieve a parameter from the configured filesystem. Parameters ---------- project_name : str The name of the project this parameter belongs to. experiment_id : str The ID of the experiment the parameter with name `parameter_name` is logged to. parameter_name : str The name of the parameter to retrieve. Returns ------- rubicon.domain.Parameter The parameter with name `parameter_name`. """ parameter_metadata_path = self._get_parameter_metadata_path( project_name, experiment_id, parameter_name) try: parameter = json.loads( await self.filesystem._cat_file(parameter_metadata_path)) except FileNotFoundError: raise RubiconException( f"No parameter with name '{parameter_name}' found.") return domain.Parameter(**parameter)
async def get_experiments(self, project_name): """Overrides `rubicon.repository.BaseRepository.get_experiments` to asynchronously retrieve all experiments from the configured filesystem that belong to the project with name `project_name`. Parameters ---------- project_name : str The name of the project to retrieve all experiments from. Returns ------- list of rubicon.domain.Experiment The experiments logged to the project with name `project_name`. """ experiment_metadata_root = self._get_experiment_metadata_root( project_name) try: experiment_metadata_paths = await self._ls_directories_only( experiment_metadata_root) experiments = [ domain.Experiment(**json.loads(data)) for data in await asyncio.gather(*[ self.filesystem._cat_file(path) for path in experiment_metadata_paths ]) ] except FileNotFoundError: return [] return experiments
def get_experiments(self, project_name): """Retrieve all experiments from the configured filesystem that belong to the project with name `project_name`. Parameters ---------- project_name : str The name of the project to retrieve all experiments from. Returns ------- list of rubicon.domain.Experiment The experiments logged to the project with name `project_name`. """ experiment_metadata_root = self._get_experiment_metadata_root( project_name) try: experiment_metadata_paths = self._ls_directories_only( experiment_metadata_root) experiments = [ domain.Experiment(**json.loads(data)) for data in self.filesystem.cat(experiment_metadata_paths).values() ] except FileNotFoundError: return [] return experiments
async def get_metric(self, project_name, experiment_id, metric_name): """Overrides `rubicon.repository.BaseRepository.get_metric` to asynchronously retrieve a metric from the configured filesystem. Parameters ---------- project_name : str The name of the project this metric belongs to. experiment_id : str The ID of the experiment the metric with name `metric_name` is logged to. metric_name : str The name of the metric to retrieve. Returns ------- rubicon.domain.Metric The metric with name `metric_name`. """ metric_metadata_path = self._get_metric_metadata_path( project_name, experiment_id, metric_name) try: metric = json.loads( await self.filesystem._cat_file(metric_metadata_path)) except FileNotFoundError: raise RubiconException( f"No metric with name '{metric_name}' found.") return domain.Metric(**metric)
async def get_artifact_metadata(self, project_name, artifact_id, experiment_id=None): """Overrides `rubicon.repository.BaseRepository.get_artifact_metadata` to asynchronously retrieve an artifact's metadata from the configured filesystem. Parameters ---------- project_name : str The name of the project the artifact with ID `artifact_id` is logged to. artifact_id : str The ID of the artifact to retrieve. experiment_id : str, optional The ID of the experiment the artifact with ID `artifact_id` is logged to. Artifacts do not need to belong to an experiment. Returns ------- rubicon.domain.Artifact The artifact with ID `artifact_id`. """ artifact_metadata_path = self._get_artifact_metadata_path( project_name, experiment_id, artifact_id ) try: artifact = json.loads(await self.filesystem._cat_file(artifact_metadata_path)) except FileNotFoundError: raise RubiconException(f"No artifact with id `{artifact_id}` found.") return domain.Artifact(**artifact)
async def get_artifacts_metadata(self, project_name, experiment_id=None): """Overrides `rubicon.repository.BaseRepository.get_artifacts_metadata` to asynchronously retrieve all artifacts' metadata from the configured filesystem that belong to the specified object. Parameters ---------- project_name : str The name of the project to retrieve all artifacts from. experiment_id : str, optional The ID of the experiment to retrieve all artifacts from. Artifacts do not need to belong to an experiment. Returns ------- list of rubicon.domain.Artifact The artifacts logged to the specified object. """ artifact_metadata_root = self._get_artifact_metadata_root(project_name, experiment_id) try: artifact_metadata_paths = await self._ls_directories_only(artifact_metadata_root) artifacts = [ domain.Artifact(**json.loads(data)) for data in await asyncio.gather( *[self.filesystem._cat_file(path) for path in artifact_metadata_paths] ) ] except FileNotFoundError: return [] return artifacts
async def get_dataframe_metadata(self, project_name, dataframe_id, experiment_id=None): """Overrides `rubicon.repository.BaseRepository.get_dataframe_metadata` to asynchronously retrieve a dataframes's metadata from the configured filesystem. Parameters ---------- project_name : str The name of the project the dataframe with ID `dataframe_id` is logged to. dataframe_id : str The ID of the dataframe to retrieve. experiment_id : str, optional The ID of the experiment the dataframe with ID `dataframe_id` is logged to. Dataframes do not need to belong to an experiment. Returns ------- rubicon.domain.Dataframe The dataframe with ID `dataframe_id`. """ dataframe_metadata_path = self._get_dataframe_metadata_path( project_name, experiment_id, dataframe_id ) try: dataframe = json.loads(await self.filesystem._cat_file(dataframe_metadata_path)) except FileNotFoundError: raise RubiconException(f"No dataframe with id `{dataframe_id}` found.") return domain.Dataframe(**dataframe)
async def get_metrics(self, project_name, experiment_id): """Overrides `rubicon.repository.BaseRepository.get_metrics` to asynchronously retrieve all metrics from the configured filesystem that belong to the experiment with ID `experiment_id`. Parameters ---------- project_name : str The name of the project the experiment with ID `experiment_id` is logged to. experiment_id : str The ID of the experiment to retrieve all metrics from. Returns ------- list of rubicon.domain.Metric The metrics logged to the experiment with ID `experiment_id`. """ metric_metadata_root = self._get_metric_metadata_root(project_name, experiment_id) try: metric_metadata_paths = await self._ls_directories_only(metric_metadata_root) metrics = [ domain.Metric(**json.loads(data)) for data in await asyncio.gather( *[self.filesystem._cat_file(path) for path in metric_metadata_paths] ) ] except FileNotFoundError: return [] return metrics
def get_dataframes_metadata(self, project_name, experiment_id=None): """Retrieve all dataframes' metadata from the configured filesystem that belong to the specified object. Parameters ---------- project_name : str The name of the project to retrieve all dataframes from. experiment_id : str, optional The ID of the experiment to retrieve all dataframes from. Dataframes do not need to belong to an experiment. Returns ------- list of rubicon.domain.Dataframe The dataframes logged to the specified object. """ dataframe_metadata_root = self._get_dataframe_metadata_root( project_name, experiment_id) try: dataframe_metadata_paths = self._ls_directories_only( dataframe_metadata_root) dataframes = [ domain.Dataframe(**json.loads(data)) for data in self.filesystem.cat(dataframe_metadata_paths).values() ] except FileNotFoundError: return [] return dataframes
async def get_feature(self, project_name, experiment_id, feature_name): """Overrides `rubicon.repository.BaseRepository.get_feature` to asynchronously retrieve a feature from the configured filesystem. Parameters ---------- project_name : str The name of the project the experiment with ID `experiment_id` is logged to. experiment_id : str The ID of the experiment the feature with name `feature_name` is logged to. feature_name : str The name of the feature to retrieve. Returns ------- rubicon.domain.Feature The feature with name `feature_name`. """ feature_metadata_path = self._get_feature_metadata_path( project_name, experiment_id, feature_name) try: feature = json.loads( await self.filesystem._cat_file(feature_metadata_path)) except FileNotFoundError: raise RubiconException( f"No feature with name '{feature_name}' found.") return domain.Feature(**feature)
async def get_experiment(self, project_name, experiment_id): """Overrides `rubicon.repository.BaseRepository.get_experiment` to asynchronously retrieve an experiment from the configured filesystem. Parameters ---------- project_name : str The name of the project the experiment with ID `experiment_id` is logged to. experiment_id : str The ID of the experiment to retrieve. Returns ------- rubicon.domain.Experiment The experiment with ID `experiment_id`. """ experiment_metadata_path = self._get_experiment_metadata_path( project_name, experiment_id) try: experiment = json.loads( await self.filesystem._cat_file(experiment_metadata_path)) except FileNotFoundError: raise RubiconException( f"No experiment with id `{experiment_id}` found.") return domain.Experiment(**experiment)
def test_can_deserialize_training_metadata(): to_deserialize = '{"training_metadata": {"_type": "training_metadata", "value": [["test/path", "SELECT * FROM test"], ["test/other/path", "SELECT * FROM test"]]}}' deserialized = json.loads(to_deserialize) assert isinstance(deserialized["training_metadata"], TrainingMetadata) assert deserialized["training_metadata"].training_metadata == [ ("test/path", "SELECT * FROM test"), ("test/other/path", "SELECT * FROM test"), ]
def get_projects(self): """Get the list of projects from the filesystem. Returns ------- list of rubicon.domain.Project The list of projects from the filesystem. """ try: project_metadata_paths = self._ls_directories_only(self.root_dir) projects = [ domain.Project(**json.loads(metadata)) for metadata in self.filesystem.cat(project_metadata_paths).values() ] except FileNotFoundError: return [] return projects
async def get_projects(self): """Overrides `rubicon.repository.BaseRepository.get_projects` to asynchronously get the list of projects from the filesystem. Returns ------- list of rubicon.domain.Project The list of projects from the filesystem. """ try: project_metadata_paths = await self._ls_directories_only(self.root_dir) projects = [ domain.Project(**json.loads(data)) for data in await asyncio.gather( *[self.filesystem._cat_file(path) for path in project_metadata_paths] ) ] except FileNotFoundError: return [] return projects
def get_project(self, project_name): """Retrieve a project from the configured filesystem. Parameters ---------- project_name : str The name of the project to retrieve. Returns ------- rubicon.domain.Project The project with name `project_name`. """ project_metadata_path = self._get_project_metadata_path(project_name) try: project = json.loads(self.filesystem.cat(project_metadata_path)) except FileNotFoundError: raise RubiconException(f"No project with name '{project_name}' found.") return domain.Project(**project)
async def get_tags(self, project_name, experiment_id=None, dataframe_id=None): """Overrides `rubicon.repository.BaseRepository.get_tags` to asynchronously retrieve tags from the configured filesystem. Parameters ---------- project_name : str The name of the project the object to retrieve tags from belongs to. experiment_id : str, optional The ID of the experiment to retrieve tags from. dataframe_id : str, optional The ID of the dataframe to retrieve tags from. Returns ------- list of dict A list of dictionaries with one key each, `added_tags` or `removed_tags`, where the value is a list of tag names that have been added to or removed from the specified object. """ tag_metadata_root = self._get_tag_metadata_root( project_name, experiment_id, dataframe_id) all_paths = await self.filesystem._lsdir(tag_metadata_root) tag_paths = [p for p in all_paths if "/tags_" in p["name"]] if len(tag_paths) == 0: return [] sorted_tag_paths = self._sort_tag_paths(tag_paths) raw_sorted_tag_data = await asyncio.gather( *[self.filesystem._cat_file(p) for _, p in sorted_tag_paths]) sorted_tag_data = [json.loads(t) for t in raw_sorted_tag_data] return sorted_tag_data
async def get_project(self, project_name): """Overrides `rubicon.repository.BaseRepository.get_project` to asynchronously retrieve a project from the configured filesystem. Parameters ---------- project_name : str The name of the project to retrieve. Returns ------- rubicon.domain.Project The project with name `project_name`. """ project_metadata_path = self._get_project_metadata_path(project_name) try: project = json.loads(await self.filesystem._cat_file(project_metadata_path)) except FileNotFoundError: raise RubiconException(f"No project with name '{project_name}' found.") return domain.Project(**project)
def test_can_deserialize_datetime(): now = datetime.utcnow() to_deserialize = '{"date": {"_type": "datetime", "value": "' + str(now) + '"}}' deserialized = json.loads(to_deserialize) assert deserialized["date"] == now
def test_can_deserialize_set(): to_deserialize = '{"tags": {"_type": "set", "value": ["tag-b", "tag-a"]}}' deserialized = json.loads(to_deserialize) assert deserialized["tags"] == set(["tag-a", "tag-b"])