def profile_lending_club(): import datetime from uuid import uuid4 now = datetime.datetime.utcnow() session_id = uuid4().hex df = pd.read_csv( os.path.join(_MY_DIR, os.pardir, "testdata", "lending_club_1000.csv")) profile = DatasetProfile(name="test", session_id=session_id, session_timestamp=now) profile.track_dataframe(df) return profile
def test_dataframe_profile(): time = datetime.datetime.now() df = util.testing.makeDataFrame() profile = DatasetProfile("test", time) profile.track_dataframe(df) profile_factory = dataframe_profile(df, name="test", timestamp=time) assert profile_factory.columns["A"].number_tracker.variance.mean == profile.columns["A"].number_tracker.variance.mean profile_factory_2 = dataframe_profile(df) assert profile_factory_2.columns["A"].number_tracker.variance.mean == profile.columns["A"].number_tracker.variance.mean profile_factory_3 = dataframe_profile(df, timestamp=103433) assert profile_factory_3.columns["A"].number_tracker.variance.mean == profile.columns["A"].number_tracker.variance.mean