def test_log_dataframe(tmpdir, df_lending_club): p = tmpdir.mkdir("whylogs") writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with session_from_config(session_config) as session: with session.logger("lendingclub") as logger: assert logger is not None logger.log_dataframe(df_lending_club) profile = logger.profile assert profile is not None summary = profile.flat_summary() flat_summary = summary["summary"] assert len(flat_summary) == 151 output_files = [] for root, subdirs, files in os.walk(p): output_files += files assert len(output_files) == 4
def test_log_rotation_minutes(tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time: session = session_from_config(session_config) with session.logger("test", with_rotation_time='m', cache=1) as logger: df = util.testing.makeDataFrame() logger.log_dataframe(df) frozen_time.tick(delta=datetime.timedelta(minutes=2)) df = util.testing.makeDataFrame() logger.log_dataframe(df) df = util.testing.makeDataFrame() logger.log_dataframe(df) frozen_time.tick(delta=datetime.timedelta(minutes=2)) df = util.testing.makeDataFrame() logger.log_dataframe(df) output_files = [] for root, subdirs, files in os.walk(output_path): output_files += files assert len(output_files) == 3 shutil.rmtree(output_path)
def test_segments_with_rotation(df_lending_club, df, tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time: session = session_from_config(session_config) with session.logger( "test", with_rotation_time="s", segments=["home_ownership"], profile_full_dataset=True, cache_size=1, ) as logger: logger.log_dataframe(df_lending_club) frozen_time.tick(delta=datetime.timedelta(seconds=1)) logger.log_dataframe(df_lending_club) frozen_time.tick(delta=datetime.timedelta(seconds=1)) with pytest.raises(KeyError): logger.log_dataframe(df) session.close() output_files = [] for _, _, files in os.walk(output_path): output_files += files assert len(output_files) == 8 shutil.rmtree(output_path, ignore_errors=True)
def test_log_multiple_calls(tmpdir, df_lending_club): original_dir = os.curdir os.chdir(script_dir) p = tmpdir.mkdir("whylogs") writer_config = WriterConfig( "local", ["protobuf", "flat"], p.realpath(), filename_template="dataset_summary-$dataset_timestamp") yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) now = datetime.datetime.now() for i in range(0, 5): with session.logger(dataset_timestamp=now + datetime.timedelta(days=i), with_rotation_time=None) as logger: logger.log_dataframe(df_lending_club) session.close() output_files = [] for root, subdirs, files in os.walk(p): output_files += files # we run 5 times, so we should have five times more files than the above test assert len(output_files) == 20 os.chdir(original_dir)
def test_log_metrics_with_boolean_labels(tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) targets = [True, False, True] predictions = [False, True, False] scores = [0.2, 0.5, 0.6] with session.logger("metrics_test") as logger: logger.log_metrics(targets, predictions, scores) profile = logger.profile metrics_profile = profile.model_profile assert metrics_profile is not None assert len(metrics_profile.metrics.confusion_matrix.labels) == 2 shutil.rmtree(output_path, ignore_errors=True)
def test_no_log_rotation(tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() basewriter = writer_from_config(WriterConfig.from_yaml(yaml_data)) l = Logger(session_id="", dataset_name="testing", writers=[basewriter], dataset_timestamp=datetime.datetime.now(tz=timezone.utc), with_rotation_time=None) l.log({"quick_test": 3}) l.flush()
def test_segments(df_lending_club, tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with session_from_config(session_config) as session: with session.logger( "test", segments=[ [{ "key": "home_ownership", "value": "RENT" }], [{ "key": "home_ownership", "value": "MORTGAGE" }], ], cache_size=1, ) as logger: logger.log_dataframe(df_lending_club) profile = logger.profile profiles = logger.segmented_profiles mortage_segment = logger.get_segment([{ "key": "home_ownership", "value": "MORTGAGE" }]) assert profile is None assert len(profiles) == 2 assert profiles[list(profiles.keys())[0]].tags["segment"] == json.dumps([{ "key": "home_ownership", "value": "RENT" }]) assert profiles[list(profiles.keys())[1]].tags["segment"] == json.dumps([{ "key": "home_ownership", "value": "MORTGAGE" }]) check_segment = profiles[list(profiles.keys())[1]] assert mortage_segment == check_segment shutil.rmtree(output_path, ignore_errors=True)
def test_config_api(tmpdir): p = tmpdir.mkdir("whylogs") writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) with session.logger("test_name") as logger: logger.log_dataframe(pd.DataFrame()) session.close()
def test_segments_keys(df_lending_club, tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) with session.logger("test", segments=["emp_title", "home_ownership"], cache_size=1) as logger: logger.log_dataframe(df_lending_club) profiles = logger.segmented_profiles assert len(profiles) == 47 shutil.rmtree(output_path, ignore_errors=True)
def test_log_multiple_segments(tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) df = pd.DataFrame(data={"x": [1, 2, 3, 1, 2, 3, 1, 2, 3], "y": [4, 5, 6, 5, 6, 4, 6, 4, 5], "z": [0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3]}) with session.logger("image_test", segments=["x", "y"]) as logger: logger.log_segments(df) assert len(logger.segmented_profiles) == 9
def test_log_dataframe(tmpdir, df_lending_club): p = tmpdir.mkdir("whylogs") writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) with session.logger("lendingclub") as logger: logger.log_dataframe(df_lending_club) output_files = [] for root, subdirs, files in os.walk(p): output_files += files assert len(output_files) == 5
def test_log_rotation_hour(tmpdir, df): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time: with session_from_config(session_config) as session: with session.logger("test", with_rotation_time="h", cache_size=1) as logger: logger.log_dataframe(df) frozen_time.tick(delta=datetime.timedelta(hours=3)) logger.log(feature_name="E", value=4) logger.log_dataframe(df) output_files = [] for _, _, files in os.walk(output_path): output_files += files assert len(output_files) == 2 shutil.rmtree(output_path, ignore_errors=True)
def test_log_image(tmpdir, image_files): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) with session.logger("image_test") as logger: for image_file_path in image_files: logger.log_image(image_file_path) profile = logger.profile columns = profile.columns assert len(columns) == 19 shutil.rmtree(output_path, ignore_errors=True)
def test_log_pil_image(tmpdir, image_files): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) session_config = SessionConfig("project", "pipeline", writers=[writer_config]) session = session_from_config(session_config) with session.logger("image_pil_test", with_rotation_time="s", cache_size=1) as logger: for image_file_path in image_files: img = Image.open(image_file_path) logger.log_image(img) profile = logger.profile columns = profile.columns assert len(columns) == 19 shutil.rmtree(output_path)
def test_segments(df_lending_club, tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() WriterConfig.from_yaml(yaml_data) test_segments = [ [{"key": "home_ownership", "value": "RENT"}], [{"key": "home_ownership", "value": "MORTGAGE"}], ] session_config = SessionConfig("project", "pipeline", writers=[writer_config]) with session_from_config(session_config) as session: with session.logger( "test", segments=test_segments, cache_size=1, ) as logger: logger.log_dataframe(df_lending_club) profile = logger.profile profiles = logger.segmented_profiles mortage_segment = logger.get_segment(test_segments[1]) assert profile is None assert len(profiles) == 2 segment_keys = [key for key in profiles[list(profiles.keys())[0]].tags.keys() if key.startswith(_TAG_PREFIX)] for segment_key in segment_keys: assert profiles[list(profiles.keys())[0]].tags[segment_key] == test_segments[0][0][_TAG_VALUE] # 'RENT' segment_keys = [key for key in profiles[list(profiles.keys())[1]].tags.keys() if key.startswith(_TAG_PREFIX)] for segment_key in segment_keys: assert profiles[list(profiles.keys())[1]].tags[segment_key] == test_segments[1][0][_TAG_VALUE] # 'MORTGAGE' check_segment = profiles[list(profiles.keys())[1]] assert mortage_segment == check_segment shutil.rmtree(output_path, ignore_errors=True)