def test_cached_dataframe_returns_correct_type(): def make_a_dataframe(): return pd.DataFrame({'x': [0, 1, 2]}) with tempfile.NamedTemporaryFile(suffix='.csv') as f: # call repeatedly to test the cold and hot cache logic for _ in range(2): df = compute_cache.cached_dataframe( f.name, compute_fn=make_a_dataframe) assert isinstance(df, pd.DataFrame), \ "Expected DataFrame, got %s : %s" % (df, type(df))
def test_cached_dataframe_with_tempfile(): """ test_cached_dataframe_with_tempfile : A temporary file exists before calling into compute_cache.cached_dataframe but is empty, should be treated as if result has never been computed before (rather than trying to load the empty file). """ counter = Counter() with tempfile.NamedTemporaryFile(suffix='.csv') as f: # call repeatedly to test hot and cold cache logic for _ in range(2): df = compute_cache.cached_dataframe( f.name, compute_fn=counter.increment_dataframe) # get counter value from inside of dataframe result = df['x'].ix[0] assert result == 1, \ "Expected result=1, got %s" % (result,) assert counter.count == 1, \ "Expected compute_fn to be called once, got %s" % ( counter.count,)
def test_dataframe_path_must_be_csv(): # compute_cache should raise an exception when filename doesn't # end with .csv extension compute_cache.cached_dataframe( csv_path="tempfile_not_csv", compute_fn=lambda _: pd.DataFrame({'x': []}))