Exemplo n.º 1
0
def test_to_dask_with_partitions_use_json_query(engine):
    query_string = '''
        {
            "query": {
                "range" : {
                    "score" : {
                        "gte" : 0,
                        "lte" : 150,
                        "boost" : 2.0
                    }
                }
            }
        }
    '''

    source = ElasticSearchTableSource(query_string, npartitions=2, **CONNECT)
    dd = source.to_dask()
    assert dd.npartitions == 2
    assert set(dd.columns) == set(df.columns)

    out = dd.compute()

    assert len(out) == len(df)
    assert all([d in out.to_dict(orient='records')
                for d in df.to_dict(orient='records')])
Exemplo n.º 2
0
def test_to_dask(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]',
                                      qargs={"sort": 'rank'},
                                      **CONNECT)

    dd = source.to_dask()
    assert dd.npartitions == 1
    assert set(dd.columns) == set(df.columns)
    out = dd.compute()

    assert out[df.columns].equals(df)
Exemplo n.º 3
0
def test_to_dask_empty_shard(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]', npartitions=5, qargs={
        "sort": 'rank'}, **CONNECT)
    dd = source.to_dask()
    assert dd.npartitions == 5
    assert set(dd.columns) == set(df.columns)

    out = dd.compute()

    assert len(out) == len(df)
    assert all([d in out.to_dict(orient='records')
                for d in df.to_dict(orient='records')])