def test_read(engine): p = ESTablePlugin() source = p.open('score:[0 TO 150]', **CONNECT) out = source.read() # this would be easier with a full query with sorting assert all([d in out.to_dict(orient='records') for d in df.to_dict(orient='records')])
def test_discover(engine): p = ESTablePlugin() source = p.open('score:[30 TO 150]', **CONNECT) info = source.discover() # NB: ES results come as dicts, so column order can vary assert info['dtype'].dtypes.to_dict() == df[:0].dtypes.to_dict() assert info['shape'] == (None, 3) assert info['npartitions'] == 1
def test_close(engine): p = ESTablePlugin() source = p.open('score:[0 TO 150]', qargs={ "sort": 'rank'}, **CONNECT) source.close() # Can reopen after close out = source.read() assert out[df.columns].equals(df)
def test_pickle(engine): p = ESTablePlugin() source = p.open('score:[0 TO 150]', qargs={ "sort": 'rank'}, **CONNECT) pickled_source = pickle.dumps(source) source_clone = pickle.loads(pickled_source) out = source_clone.read() assert out[df.columns].equals(df)
def test_to_dask(engine): p = ESTablePlugin() source = p.open('score:[0 TO 150]', qargs={ "sort": 'rank'}, **CONNECT) dd = source.to_dask() assert dd.npartitions == 1 assert set(dd.columns) == set(df.columns) out = dd.compute() assert out[df.columns].equals(df)
def test_read_chunked(engine): p = ESTablePlugin() # drop in a test of sort - only works on numerical field without work source = p.open('score:[0 TO 150]', qargs={ "sort": 'rank'}, **CONNECT) parts = list(source.read_chunked()) out = pd.concat(parts) # with sort, comparison is simpler assert out[df.columns].equals(df)
def test_discover_after_read(engine): p = ESTablePlugin() source = p.open('score:[0 TO 150]', **CONNECT) info = source.discover() assert info['dtype'].dtypes.to_dict() == df[:0].dtypes.to_dict() assert info['shape'] == (None, 3) assert info['npartitions'] == 1 out = source.read() assert all([d in out.to_dict(orient='records') for d in df.to_dict(orient='records')]) info = source.discover() assert info['dtype'].dtypes.to_dict() == df[:0].dtypes.to_dict() assert info['shape'] == (4, 3) assert info['npartitions'] == 1
def test_open(engine): p = ESTablePlugin() d = p.open('score:[30 TO 150]', **CONNECT) assert d.container == 'dataframe' assert d.description is None verify_datasource_interface(d)