Exemplo n.º 1
0
def test_segments_with_rotation(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        session = session_from_config(session_config)
        with session.logger("test", with_rotation_time='s', segments=["home_ownership"], profile_full_dataset=True, cache=1) as logger:
            logger.log_dataframe(df_lending_club)
            frozen_time.tick(delta=datetime.timedelta(seconds=1))
            logger.log_dataframe(df_lending_club)
            frozen_time.tick(delta=datetime.timedelta(seconds=1))

            df = util.testing.makeDataFrame()
            with pytest.raises(KeyError):
                logger.log_dataframe(df)
    output_files = []
    for root, subdirs, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 8
    shutil.rmtree(output_path)
Exemplo n.º 2
0
def test_log_rotation_days(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        session = session_from_config(session_config)
        with session.logger("test", with_rotation_time='d',
                            cache_size=1) as logger:
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            frozen_time.tick(delta=datetime.timedelta(days=1))
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            frozen_time.tick(delta=datetime.timedelta(days=2))
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
    output_files = []
    for root, subdirs, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 3
    shutil.rmtree(output_path)
Exemplo n.º 3
0
def test_log_dataframe(tmpdir, df_lending_club):
    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)

    with session.logger("lendingclub") as logger:
        assert logger is not None
        logger.log_dataframe(df_lending_club)
        profile = logger.profile
        assert profile is not None

        summary = profile.flat_summary()

        flat_summary = summary['summary']

        assert len(flat_summary) == 151

    output_files = []
    for root, subdirs, files in os.walk(p):
        output_files += files
    assert len(output_files) == 5
Exemplo n.º 4
0
def test_log_metrics(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)
    targets = ["class_name1", "class_name2", "class_name3"]

    predictions = ["class_name1", "class_name2", "class_name2"]
    scores = [0.2, 0.5, 0.6]
    num_labels = 3
    with session.logger("metrics_test") as logger:

        logger.log_metrics(targets, predictions, scores)

        profile = logger.profile
        metrics_profile = profile.model_profile

        assert metrics_profile is not None
        assert len(
            metrics_profile.metrics.confusion_matrix.labels) == num_labels
    shutil.rmtree(output_path)
Exemplo n.º 5
0
def test_log_multiple_calls(tmpdir, df_lending_club):
    original_dir = os.curdir
    os.chdir(script_dir)

    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath(
    ), filename_template="dataset_summary-$dataset_timestamp")
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)

    now = datetime.datetime.now()
    for i in range(0, 5):
        with session.logger(dataset_timestamp=now + datetime.timedelta(days=i)) as logger:
            logger.log_dataframe(df_lending_club)

    output_files = []
    for root, subdirs, files in os.walk(p):
        output_files += files
    # we run 5 times, so we should have five times more files than the above test
    assert len(output_files) == 25
    os.chdir(original_dir)
Exemplo n.º 6
0
def test_log_pil_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_pil_test",
                        with_rotation_time="1m",
                        cache_size=1) as logger:

        for image_file_path in image_files:
            img = Image.open(image_file_path)
            logger.log_image(img)

        profile = logger.profile
        columns = profile.columns
        for column_name in _EXPECTED_COLUMNS:
            assert column_name in columns, f"{column_name} not found in {columns}"
    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 7
0
def test_log_metrics_with_boolean_labels(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)
    targets = [True, False, True]

    predictions = [False, True, False]
    scores = [0.2, 0.5, 0.6]
    with session.logger("metrics_test") as logger:
        logger.log_metrics(targets, predictions, scores)

        profile = logger.profile
        metrics_profile = profile.model_profile

        assert metrics_profile is not None
        assert len(metrics_profile.metrics.confusion_matrix.labels) == 2
    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 8
0
def test_segments(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])
    with session_from_config(session_config) as session:
        with session.logger(
                "test",
                segments=[
                    [{
                        "key": "home_ownership",
                        "value": "RENT"
                    }],
                    [{
                        "key": "home_ownership",
                        "value": "MORTGAGE"
                    }],
                ],
                cache_size=1,
        ) as logger:
            logger.log_dataframe(df_lending_club)
            profile = logger.profile
            profiles = logger.segmented_profiles
            mortage_segment = logger.get_segment([{
                "key": "home_ownership",
                "value": "MORTGAGE"
            }])

    assert profile is None
    assert len(profiles) == 2
    assert profiles[list(profiles.keys())[0]].tags["segment"] == json.dumps([{
        "key":
        "home_ownership",
        "value":
        "RENT"
    }])
    assert profiles[list(profiles.keys())[1]].tags["segment"] == json.dumps([{
        "key":
        "home_ownership",
        "value":
        "MORTGAGE"
    }])
    check_segment = profiles[list(profiles.keys())[1]]
    assert mortage_segment == check_segment

    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 9
0
def test_config_api(tmpdir):
    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("test_name") as logger:
        logger.log_dataframe(pd.DataFrame())
    session.close()
Exemplo n.º 10
0
def test_segments_keys(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)
    with session.logger("test", segments=["emp_title", "home_ownership"], cache_size=1) as logger:
        logger.log_dataframe(df_lending_club)
        profiles = logger.segmented_profiles
        assert len(profiles) == 47
    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 11
0
def test_log_multiple_segments(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])

    session = session_from_config(session_config)

    df = pd.DataFrame(data={"x": [1, 2, 3, 1, 2, 3, 1, 2, 3], "y": [4, 5, 6, 5, 6, 4, 6, 4, 5], "z": [0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3]})
    with session.logger("image_test", segments=["x", "y"]) as logger:
        logger.log_segments(df)
        assert len(logger.segmented_profiles) == 9
Exemplo n.º 12
0
def test_no_log_rotation(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    basewriter = writer_from_config(WriterConfig.from_yaml(yaml_data))
    l = Logger(session_id="", dataset_name="testing", writers=[basewriter], dataset_timestamp=datetime.datetime.now(tz=timezone.utc), with_rotation_time=None)
    l.log({"quick_test": 3})
    l.flush()
Exemplo n.º 13
0
def test_log_rotation_hour(tmpdir, df):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        with session_from_config(session_config) as session:
            with session.logger("test", with_rotation_time="h", cache_size=1) as logger:
                logger.log_dataframe(df)
                frozen_time.tick(delta=datetime.timedelta(hours=3))
                logger.log(feature_name="E", value=4)
                logger.log_dataframe(df)

    output_files = []
    for _, _, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 2
    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 14
0
def test_log_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_test") as logger:

        for image_file_path in image_files:
            logger.log_image(image_file_path)

        profile = logger.profile
        columns = profile.columns
        assert len(columns) == 19
    shutil.rmtree(output_path, ignore_errors=True)
Exemplo n.º 15
0
def test_log_pil_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_pil_test", with_rotation_time="s",
                        cache_size=1) as logger:

        for image_file_path in image_files:
            img = Image.open(image_file_path)
            logger.log_image(img)

        profile = logger.profile
        columns = profile.columns
        assert len(columns) == 19
    shutil.rmtree(output_path)
Exemplo n.º 16
0
def test_segments(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)
    test_segments = [
        [{"key": "home_ownership", "value": "RENT"}],
        [{"key": "home_ownership", "value": "MORTGAGE"}],
    ]

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    with session_from_config(session_config) as session:
        with session.logger(
            "test",
            segments=test_segments,
            cache_size=1,
        ) as logger:
            logger.log_dataframe(df_lending_club)
            profile = logger.profile
            profiles = logger.segmented_profiles
            mortage_segment = logger.get_segment(test_segments[1])

    assert profile is None
    assert len(profiles) == 2
    segment_keys = [key for key in profiles[list(profiles.keys())[0]].tags.keys() if key.startswith(_TAG_PREFIX)]
    for segment_key in segment_keys:
        assert profiles[list(profiles.keys())[0]].tags[segment_key] == test_segments[0][0][_TAG_VALUE]  # 'RENT'

    segment_keys = [key for key in profiles[list(profiles.keys())[1]].tags.keys() if key.startswith(_TAG_PREFIX)]
    for segment_key in segment_keys:
        assert profiles[list(profiles.keys())[1]].tags[segment_key] == test_segments[1][0][_TAG_VALUE]  # 'MORTGAGE'

    check_segment = profiles[list(profiles.keys())[1]]
    assert mortage_segment == check_segment
    shutil.rmtree(output_path, ignore_errors=True)