示例#1
0
def get_run_profiles(run_id: str, dataset_name: str = "default", client=None):
    """
    Retrieve all whylogs DatasetProfile for a given run and a given dataset name.

    :param client: :py:class:`mlflow.tracking.MlflowClient`
    :rtype: :py:class:`typing.List[whylogs.DatasetProfile]`
    :param run_id: the run id
    :param dataset_name: the dataset name within a run. If not set, use the default value "default"
    """

    import mlflow
    import tempfile
    import shutil
    from whylogs import DatasetProfile

    if client is None:
        client = mlflow.tracking.MlflowClient()

    artifacts = client.list_artifacts(run_id,
                                      path=f"{_WHYLOGS_PATH}/{dataset_name}")
    if len(artifacts) == 1 and not artifacts[0].is_dir:
        tmp_dir = tempfile.mkdtemp()
        output_file = client.download_artifacts(run_id, artifacts[0].path,
                                                tmp_dir)
        try:
            with open(output_file, "rb") as f:
                return list(DatasetProfile.parse_delimited(f.read()))
        finally:
            shutil.rmtree(tmp_dir)
    else:
        return []
示例#2
0
def test_write_template_path():
    data_time = time.from_utc_ms(9999)
    session_time = time.from_utc_ms(88888)
    path_template = "$name-$session_timestamp-$dataset_timestamp-$session_id"
    writer_config = WriterConfig("local", ["protobuf", "flat"], "output", path_template, "dataset-profile-$name")
    writer = writer_from_config(writer_config)
    dp = DatasetProfile("name", data_time, session_time, session_id="session")
    assert writer.path_suffix(dp) == "name-88888-9999-session"
    assert writer.file_name(dp, ".txt") == "dataset-profile-name.txt"