Exemplo n.º 1
0
def test_to_dask_with_partitions_use_json_query(engine):
    query_string = '''
        {
            "query": {
                "range" : {
                    "score" : {
                        "gte" : 0,
                        "lte" : 150,
                        "boost" : 2.0
                    }
                }
            }
        }
    '''

    source = ElasticSearchTableSource(query_string, npartitions=2, **CONNECT)
    dd = source.to_dask()
    assert dd.npartitions == 2
    assert set(dd.columns) == set(df.columns)

    out = dd.compute()

    assert len(out) == len(df)
    assert all([d in out.to_dict(orient='records')
                for d in df.to_dict(orient='records')])
Exemplo n.º 2
0
def test_read(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]', **CONNECT)
    out = source.read()
    # this would be easier with a full query with sorting
    assert all([
        d in out.to_dict(orient='records')
        for d in df.to_dict(orient='records')
    ])
Exemplo n.º 3
0
def test_discover(engine):
    source = ElasticSearchTableSource('score:[30 TO 150]', **CONNECT)
    info = source.discover()
    # NB: ES results come as dicts, so column order can vary
    assert info['dtype'] == {k: str(v) for k, v
                             in df[:0].dtypes.to_dict().items()}
    assert info['shape'] == (None, 3)
    assert info['npartitions'] == 1
Exemplo n.º 4
0
def test_close(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]',
                                      qargs={"sort": 'rank'},
                                      **CONNECT)

    source.close()
    # Can reopen after close
    out = source.read()

    assert out[df.columns].equals(df)
Exemplo n.º 5
0
def test_to_dask(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]',
                                      qargs={"sort": 'rank'},
                                      **CONNECT)

    dd = source.to_dask()
    assert dd.npartitions == 1
    assert set(dd.columns) == set(df.columns)
    out = dd.compute()

    assert out[df.columns].equals(df)
Exemplo n.º 6
0
def test_to_dask_empty_shard(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]', npartitions=5, qargs={
        "sort": 'rank'}, **CONNECT)
    dd = source.to_dask()
    assert dd.npartitions == 5
    assert set(dd.columns) == set(df.columns)

    out = dd.compute()

    assert len(out) == len(df)
    assert all([d in out.to_dict(orient='records')
                for d in df.to_dict(orient='records')])
Exemplo n.º 7
0
def test_discover_after_read(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]', **CONNECT)
    info = source.discover()
    dt = {k: str(v) for k, v in df.dtypes.to_dict().items()}
    assert info['dtype'] == dt
    assert info['shape'] == (None, 3)
    assert info['npartitions'] == 1

    out = source.read()
    assert all([d in out.to_dict(orient='records')
               for d in df.to_dict(orient='records')])

    info = source.discover()
    assert info['dtype'] == dt
    assert info['shape'] == (4, 3)
    assert info['npartitions'] == 1
Exemplo n.º 8
0
def test_pickle(engine):
    source = ElasticSearchTableSource('score:[0 TO 150]',
                                      qargs={"sort": 'rank'},
                                      **CONNECT)

    pickled_source = pickle.dumps(source)
    source_clone = pickle.loads(pickled_source)

    out = source_clone.read()

    assert out[df.columns].equals(df)
Exemplo n.º 9
0
def test_open(engine):
    d = ElasticSearchTableSource('score:[30 TO 150]', **CONNECT)
    assert d.container == 'dataframe'
    assert d.description is None
    verify_datasource_interface(d)
Exemplo n.º 10
0
def test_open_with_two_partitions(engine):
    d = ElasticSearchTableSource('score:[30 TO 150]', npartitions=2,  **CONNECT)
    assert d.container == 'dataframe'
    assert d.description is None
    assert d.npartitions == 2
    verify_datasource_interface(d)