Пример #1
0
def get_or_create_session(path_to_config: Optional[str] = None, report_progress: Optional[bool] = False):
    """
    Retrieve the current active global session.

    If no active session exists, attempt to load config and create a new
    session.

    If an active session exists, return the session without loading new
    config.

    :return: The global active session
    :rtype: Session
    :type path_to_config: str
    """
    global _session
    if _session is not None and _session.is_active():
        _getLogger(__name__).debug("Active session found, ignoring session kwargs")
    else:
        config = load_config(path_to_config)
        if config is None:
            print("WARN: Missing config")

            config = SessionConfig(
                "default-project",
                "default-pipeline",
                [WriterConfig(type="local", output_path="output", formats=["all"])],
                MetadataConfig(type="local", output_path="output", input_path=""),
                False,
            )
        if report_progress is not None:
            config.report_progress = report_progress

        _session = session_from_config(config)
    return _session
Пример #2
0
def test_s3_writer_metadata(df_lending_club, moto_boto,
                            s3_all_config_metadata_path):

    assert os.path.exists(s3_all_config_metadata_path)

    config = load_config(s3_all_config_metadata_path)
    session = session_from_config(config)
    session.estimate_segments(df_lending_club,
                              name="dataset_test",
                              target_field="funded_amnt_inv",
                              max_segments=30)
    client = boto3.client("s3")
    objects = client.list_objects(Bucket="mocked_bucket")

    for idx, each_objc in enumerate(objects["Contents"]):
        assert each_objc["Key"] == "metadata/segments.json"

    with session.logger("dataset_test") as logger:
        logger.log_dataframe(df_lending_club)
    session.close()

    objects = client.list_objects(Bucket="mocked_bucket")
    print(objects)
    for idx, each_objc in enumerate(objects["Contents"]):
        print(each_objc["Key"])
        assert each_objc["Key"] == object_keys_meta_config[idx]
Пример #3
0
def get_or_create_session():
    """
    Retrieve the current active global session.

    If no active session exists, attempt to load config and create a new
    session.

    If an active session exists, return the session without loading new
    config.

    Returns
    -------
    session : Session
        The global active session
    """
    global _session
    if _session is not None and _session.is_active():
        _getLogger(__name__).debug(
            "Active session found, ignoring session kwargs")
    else:
        config = load_config()
        if config is None:
            print("WARN: Missing config")
            writer = WriterConfig(type="local",
                                  output_path="output",
                                  formats=["all"])
            config = SessionConfig("default-project", "default-pipeline",
                                   [writer], False)
        _session = session_from_config(config)
    return _session
Пример #4
0
def reset_default_session():
    """
    Reset and deactivate the global whylogs logging session.
    """
    global _session
    if _session is not None:
        _session.close()
    config: SessionConfig = load_config()
    if config is None:
        raise EnvironmentError("Unable to load whylogs config")
    _session = session_from_config(config)
Пример #5
0
def reset_default_session():
    """
    Reset and deactivate the global whylogs logging session.
    """
    global _session
    if _session is not None:
        _session.close()
    config: SessionConfig = load_config()
    if config is None:
        config = SessionConfig("default-project", "default-pipeline", [
            WriterConfig(type="local", output_path="output", formats=["all"])
        ], False)
    _session = session_from_config(config)
Пример #6
0
def test_s3_writer_transport_config(s3_transport_config_path):

    assert os.path.exists(s3_transport_config_path)
    config = load_config(s3_transport_config_path)

    assert config.writers[
        0].transport_parameters.endpoint_url == "http://127.0.0.1:5000/"
    assert config.writers[
        0].transport_parameters.aws_access_key_id == AWS_ACCESS_KEY_ID
    assert config.writers[
        0].transport_parameters.aws_secret_access_key == AWS_SECRET_ACCESS_KEY
    assert config.writers[
        0].transport_parameters.region_name == DEFAULT_REGION_NAME
    assert config.writers[0].transport_parameters.verify == "path/to/file"
Пример #7
0
def test_s3_writer(df_lending_club, moto_boto, s3_all_config_path):

    assert os.path.exists(s3_all_config_path)

    config = load_config(s3_all_config_path)
    session = session_from_config(config)

    with session.logger("dataset_test_s3") as logger:
        logger.log_dataframe(df_lending_club)
    session.close()

    client = boto3.client("s3")
    objects = client.list_objects(Bucket="mocked_bucket")
    for idx, each_objc in enumerate(objects["Contents"]):
        assert each_objc["Key"] == object_keys[idx]
Пример #8
0
def test_mlflow_patched(mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    assert whylogs.enable_mlflow(session)
    assert mlflow.whylogs is not None
    print("HEY LISTEN")
    whylogs.mlflow.disable_mlflow()
Пример #9
0
def test_profile_viewer(tmpdir, local_config_path):

    config = load_config(local_config_path)
    session = session_from_config(config)

    with session.logger("mytestytest",
                        dataset_timestamp=datetime.datetime(2021, 6,
                                                            2)) as logger:
        for _ in range(5):
            logger.log({"uniform_integers": np.random.randint(0, 50)})
            logger.log({"nulls": None})

        profile = logger.profile
    result = profile_viewer(profiles=[profile],
                            output_path=tmpdir + "my_test.html")
    assert os.path.exists(tmpdir + "my_test.html")
    assert result == tmpdir + "my_test.html"
Пример #10
0
def test_s3_writer(df_lending_club, moto_boto, s3_config_path):

    assert os.path.exists(s3_config_path)

    config = load_config(s3_config_path)
    session = session_from_config(config)

    with session.logger("dataset_test_s3") as logger:
        logger.log_dataframe(df_lending_club)

    client = boto3.client('s3')
    objects = client.list_objects(Bucket="mocked_bucket")

    assert len([each_obj["Key"] for each_obj in objects["Contents"]]) == 1
    assert objects["Contents"][0]["Key"] == "dataset_test_s3/dataset_summary/protobuf/dataset_summary.bin"
    assert "s3:" not in [d.name for d in os.scandir(
        os.getcwd()) if d.is_dir()]
Пример #11
0
def test_get_run_profiles_shouldReturn_multipleProfiles(
        tmpdir, mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    set_up_mlflow(mlflow, tmpdir)
    whylogs.enable_mlflow(session)

    with mlflow.start_run():
        mlflow.whylogs.log(features={"a": 1})
        mlflow.whylogs.log(features={"a": 1}, dataset_name="another-profile")

    with mlflow.start_run():
        mlflow.whylogs.log(features={"a": 1}, dataset_name="another-profile")

    runs = whylogs.mlflow.list_whylogs_runs("0")
    default_profiles = whylogs.mlflow.get_run_profiles(run_id=runs[0].run_id)
    another_profile = whylogs.mlflow.get_run_profiles(
        run_id=runs[0].run_id, dataset_name="another-profile")

    assert len(runs) == 2
    # verify the number of profiles for each datasetname
    assert len(
        whylogs.mlflow.get_experiment_profiles("0",
                                               dataset_name="default")) == 2
    assert len(
        whylogs.mlflow.get_experiment_profiles(
            "0", dataset_name="another-profile")) == 2

    # for the first run, verify content
    assert len(default_profiles) == 1
    assert len(another_profile) == 1
    # assert default_profiles[0].name == "default"
    assert default_profiles[0].dataset_timestamp is not None
    assert another_profile[0].dataset_timestamp is not None
Пример #12
0
def test_patch_multiple_times(mlflow_config_path):
    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    # patch three times
    assert whylogs.enable_mlflow(session)
    assert whylogs.enable_mlflow(session)
    assert whylogs.enable_mlflow(session)

    import mlflow

    assert mlflow.whylogs is not None
    whylogs.mlflow.disable_mlflow()
Пример #13
0
def test_assert_whylogsrun_close_is_called(tmpdir, mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    set_up_mlflow(mlflow, tmpdir)
    with mock.patch.object(whylogs.mlflow.patcher.WhyLogsRun,
                           "_close") as mock_close:
        whylogs.enable_mlflow(session)
        with mlflow.start_run():
            pass

        mock_close.assert_called_once()
    whylogs.mlflow.disable_mlflow()
Пример #14
0
def test_assert_log_artifact_is_called(tmpdir, mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    set_up_mlflow(mlflow, tmpdir)
    with mock.patch.object(mlflow, "log_artifact") as log_artifact:
        whylogs.enable_mlflow(session)
        with mlflow.start_run():
            mlflow.whylogs.log(features={"a": 1})

        log_artifact.assert_called_once()

    whylogs.mlflow.disable_mlflow()
Пример #15
0
def test_listRuns_shouldReturn_NoRuns(tmpdir, mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    set_up_mlflow(mlflow, tmpdir)
    whylogs.enable_mlflow(session)

    for i in range(0, 10):
        with mlflow.start_run():
            pass

    assert len(mlflow.list_run_infos("0")) == 10
    assert len(whylogs.mlflow.list_whylogs_runs("0")) == 0
    whylogs.mlflow.disable_mlflow()
Пример #16
0
def session_from_config(config: SessionConfig = None,
                        config_path: Optional[str] = "") -> Session:
    """
    Construct a whylogs session from a `SessionConfig` or from a config_path
    """

    if config is None:
        config = load_config(config_path)

    writers = list(map(lambda x: writer_from_config(x), config.writers))
    metadata_writer = None
    if config.metadata:
        metadata_writer = metadata_from_config(config.metadata)
    return Session(
        config.project,
        config.pipeline,
        writers,
        metadata_writer,
        config.verbose,
        config.with_rotation_time,
        config.cache_size,
        report_progress=config.report_progress,
    )
Пример #17
0
def test_listRuns_shouldReturn_CorrectRunCount(tmpdir, mlflow_config_path):
    import mlflow

    import whylogs
    from whylogs.app.config import load_config
    from whylogs.app.session import session_from_config

    assert os.path.exists(mlflow_config_path)

    config = load_config(mlflow_config_path)
    session = session_from_config(config)

    set_up_mlflow(mlflow, tmpdir)
    whylogs.enable_mlflow(session)

    for i in range(0, 10):
        with mlflow.start_run():
            if i % 2 == 0:
                mlflow.whylogs.log(features={"a": 1})
    print("WEIRD")
    assert len(mlflow.list_run_infos("0")) == 10
    assert len(whylogs.mlflow.list_whylogs_runs("0")) == 5
    assert len(whylogs.mlflow.get_experiment_profiles("0")) == 5
    whylogs.mlflow.disable_mlflow()