def test_to_dask_with_partitions_use_json_query(engine): query_string = ''' { "query": { "range" : { "score" : { "gte" : 0, "lte" : 150, "boost" : 2.0 } } } } ''' source = ElasticSearchTableSource(query_string, npartitions=2, **CONNECT) dd = source.to_dask() assert dd.npartitions == 2 assert set(dd.columns) == set(df.columns) out = dd.compute() assert len(out) == len(df) assert all([d in out.to_dict(orient='records') for d in df.to_dict(orient='records')])
def test_to_dask(engine): source = ElasticSearchTableSource('score:[0 TO 150]', qargs={"sort": 'rank'}, **CONNECT) dd = source.to_dask() assert dd.npartitions == 1 assert set(dd.columns) == set(df.columns) out = dd.compute() assert out[df.columns].equals(df)
def test_to_dask_empty_shard(engine): source = ElasticSearchTableSource('score:[0 TO 150]', npartitions=5, qargs={ "sort": 'rank'}, **CONNECT) dd = source.to_dask() assert dd.npartitions == 5 assert set(dd.columns) == set(df.columns) out = dd.compute() assert len(out) == len(df) assert all([d in out.to_dict(orient='records') for d in df.to_dict(orient='records')])