def test_pre_compute_with_projection_projects_on_data_frames(): csv = CSV(example('iris.csv')) s = symbol('s', csv.dshape) result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(), csv, comfortable_memory=10) assert set(result.get_chunk().columns) == \ set(['sepal_length', 'sepal_width'])
def test_pre_compute_calls_lean_projection(): csv = CSV(example('iris.csv')) s = symbol('s', csv.dshape) result = pre_compute(s.sort('sepal_length').species, csv, comfortable_memory=10) assert set(result.get_chunk().columns) == \ set(['sepal_length', 'species'])
def test_pre_compute_calls_lean_projection(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) result = pre_compute(s.sort('sepal_length').species, csv, comfortable_memory=10) assert set(first(result).columns) == \ set(['sepal_length', 'species'])
def test_pre_compute_with_projection_projects_on_data_frames(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(), csv, comfortable_memory=10) assert set(first(result).columns) == \ set(['sepal_length', 'sepal_width'])
def test_pre_compute_with_head_on_large_csv_yields_iterator(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) assert isinstance( pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
def test_pre_compute_on_large_csv_gives_chunked_reader(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) assert isinstance(pre_compute(s.species, csv, comfortable_memory=10), (chunks(pd.DataFrame), pd.io.parsers.TextFileReader))
def test_pre_compute_on_small_csv_gives_dataframe(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) assert isinstance(pre_compute(s.species, csv), (Series, DataFrame))
def test_pre_compute_on_small_csv_gives_dataframe(): csv = CSV(example('iris.csv')) s = symbol('s', csv.dshape) assert isinstance(pre_compute(s.species, csv), DataFrame)
def test_pre_compute_with_head_on_large_csv_yields_iterator(): csv = CSV(example("iris.csv")) s = symbol("s", discover(csv)) assert isinstance(pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
def test_pre_compute_on_large_csv_gives_chunked_reader(): csv = CSV(example('iris.csv')) s = symbol('s', csv.dshape) assert isinstance(pre_compute(s.species, csv, comfortable_memory=10), pandas.io.parsers.TextFileReader)
def test_pre_compute_calls_lean_projection(): csv = CSV(example("iris.csv")) s = symbol("s", discover(csv)) result = pre_compute(s.sort("sepal_length").species, csv, comfortable_memory=10) assert set(first(result).columns) == set(["sepal_length", "species"])
def test_pre_compute_with_projection_projects_on_data_frames(): csv = CSV(example("iris.csv")) s = symbol("s", discover(csv)) result = pre_compute(s[["sepal_length", "sepal_width"]].distinct(), csv, comfortable_memory=10) assert set(first(result).columns) == set(["sepal_length", "sepal_width"])
def test_pre_compute_with_head_on_large_csv_yields_iterator(): csv = CSV(example('iris.csv')) s = symbol('s', csv.dshape) assert isinstance(pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
def test_pre_compute_on_large_csv_gives_dask_reader(): csv = CSV(example('iris.csv')) s = symbol('s', discover(csv)) assert isinstance(pre_compute(s.species, csv, comfortable_memory=10), dask.dataframe.DataFrame)