def get_run_profiles(run_id: str, dataset_name: str = "default", client=None): """ Retrieve all whylogs DatasetProfile for a given run and a given dataset name. :param client: :py:class:`mlflow.tracking.MlflowClient` :rtype: :py:class:`typing.List[whylogs.DatasetProfile]` :param run_id: the run id :param dataset_name: the dataset name within a run. If not set, use the default value "default" """ import mlflow import tempfile import shutil from whylogs import DatasetProfile if client is None: client = mlflow.tracking.MlflowClient() artifacts = client.list_artifacts(run_id, path=f"{_WHYLOGS_PATH}/{dataset_name}") if len(artifacts) == 1 and not artifacts[0].is_dir: tmp_dir = tempfile.mkdtemp() output_file = client.download_artifacts(run_id, artifacts[0].path, tmp_dir) try: with open(output_file, "rb") as f: return list(DatasetProfile.parse_delimited(f.read())) finally: shutil.rmtree(tmp_dir) else: return []
def test_write_template_path(): data_time = time.from_utc_ms(9999) session_time = time.from_utc_ms(88888) path_template = "$name-$session_timestamp-$dataset_timestamp-$session_id" writer_config = WriterConfig("local", ["protobuf", "flat"], "output", path_template, "dataset-profile-$name") writer = writer_from_config(writer_config) dp = DatasetProfile("name", data_time, session_time, session_id="session") assert writer.path_suffix(dp) == "name-88888-9999-session" assert writer.file_name(dp, ".txt") == "dataset-profile-name.txt"