Пример #1
0
def test_can_trivially_create_sqlite_table():
    pytest.importorskip('sqlalchemy')
    Data('sqlite:///'+example('iris.db')+'::iris')

    # in context
    with Data('sqlite:///'+example('iris.db')+'::iris') as d:
        assert d is not None
Пример #2
0
def test_client_add_dataset():
    client.requests = test_add  # OMG more monkey patching
    ec = Client('localhost:6363')
    ec.add('iris', example('iris.csv'))
    assert 'iris' in ec.dshape.measure.dict
    iris_data = bz_data(example('iris.csv'))
    assert ec.dshape.measure.dict['iris'] == iris_data.dshape
Пример #3
0
def test_can_trivially_create_sqlite_table():
    pytest.importorskip("sqlalchemy")
    Data("sqlite:///" + example("iris.db") + "::iris")

    # in context
    with Data("sqlite:///" + example("iris.db") + "::iris") as d:
        assert d is not None
Пример #4
0
def test_can_trivially_create_sqlite_table():
    pytest.importorskip('sqlalchemy')
    Data('sqlite:///'+example('iris.db')+'::iris')

    # in context
    with Data('sqlite:///'+example('iris.db')+'::iris') as d:
        assert d is not None
Пример #5
0
def test_client_add_dataset():
    client.requests = test_add  # OMG more monkey patching
    ec = Client('localhost:6363')
    ec.add('iris', example('iris.csv'))
    assert 'iris' in ec.dshape.measure.dict
    iris_data = bz_data(example('iris.csv'))
    assert ec.dshape.measure.dict['iris'] == iris_data.dshape
Пример #6
0
def test_client_add_dataset_with_args():
    client.requests = test_add  # OMG more monkey patching
    ec = Client('localhost:6363')
    ec.add('teams', 'sqlite:///' + example('teams.db'), 'teams',
           primary_key='teamID')
    assert 'teams' in ec.dshape.measure.dict
    teams_data = bz_data('sqlite:///' + example('teams.db') + '::teams')
    assert ec.dshape.measure.dict['teams'] == teams_data.dshape
Пример #7
0
def test_client_add_dataset_with_args():
    client.requests = test_add  # OMG more monkey patching
    ec = Client('localhost:6363')
    ec.add('teams',
           'sqlite:///' + example('teams.db'),
           'teams',
           primary_key='teamID')
    assert 'teams' in ec.dshape.measure.dict
    teams_data = bz_data('sqlite:///' + example('teams.db') + '::teams')
    assert ec.dshape.measure.dict['teams'] == teams_data.dshape
Пример #8
0
def test_add_data_to_empty_server(empty_server, serial):
    # add data
    with temp_server() as test:
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = empty_server.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        response2 = empty_server.get('/datashape')
        expected2 = str(discover({'iris': resource(iris_path)}))
        assert response2.data.decode('utf-8') == expected2

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = empty_server.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Пример #9
0
def test_add_data_to_server(temp_add_server, serial):
    # add data
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    t = data({'iris': data(iris_path)})
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr': to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path)})
    assert result3 == expected3
Пример #10
0
def test_add_data_to_empty_server(empty_server, serial):
    # add data
    with temp_server() as test:
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = empty_server.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        response2 = empty_server.get('/datashape')
        expected2 = str(discover({'iris': resource(iris_path)}))
        assert response2.data.decode('utf-8') == expected2

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = empty_server.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Пример #11
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    result = pre_compute(s.sort('sepal_length').species,
                         csv, comfortable_memory=10)
    assert set(result.get_chunk().columns) == \
            set(['sepal_length', 'species'])
Пример #12
0
def test_add_expanded_payload_has_effect(temp_add_server, serial):
    # Ensure that the expanded payload format actually passes the arguments
    # through to the resource constructor
    iris_path = example('iris-latin1.tsv')
    csv_kwargs = {'delimiter': '\t', 'encoding': 'iso-8859-1'}
    blob = serial.dumps({'iris': {'source': iris_path,
                                  'kwargs': csv_kwargs}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path, **csv_kwargs)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    t = data({'iris': data(iris_path, **csv_kwargs)})
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr': to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path, **csv_kwargs)})
    assert result3 == expected3
Пример #13
0
def test_add_data_to_server(temp_add_server, serial):
    # add data
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    client.requests = temp_add_server
    t = data(bz.Client('localhost:6363'))
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr':
                                                        to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path)})
    assert result3 == expected3
Пример #14
0
def test_into_ColumnDataSource_pytables():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource

    pyt = PyTables(example('accounts.h5'), '/accounts')
    cds = into(ColumnDataSource, pyt)
    assert 'balance' and 'id' and 'name' in cds.column_names
Пример #15
0
def test_add_expanded_payload_has_effect(temp_add_server, serial):
    # Ensure that the expanded payload format actually passes the arguments
    # through to the resource constructor
    iris_path = example('iris-latin1.tsv')
    csv_kwargs = {'delimiter': '\t', 'encoding': 'iso-8859-1'}
    blob = serial.dumps({'iris': {'source': iris_path, 'kwargs': csv_kwargs}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path, **csv_kwargs)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    client.requests = temp_add_server
    t = data(bz.Client('localhost:6363'))
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr':
                                                        to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path, **csv_kwargs)})
    assert result3 == expected3
Пример #16
0
def test_into_ColumnDataSource_pytables():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource

    pyt = PyTables(example('accounts.h5'), '/accounts')
    cds = into(ColumnDataSource, pyt)
    assert 'balance' and 'id' and 'name' in cds.column_names
Пример #17
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s.sort('sepal_length').species,
                         csv, comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'species'])
Пример #18
0
def test_pre_compute_with_projection_projects_on_data_frames():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(),
                         csv, comfortable_memory=10)
    assert set(result.get_chunk().columns) == \
            set(['sepal_length', 'sepal_width'])
Пример #19
0
def test_cant_add_data_to_server(iris_server, serial):
    # try adding more data to server
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = iris_server.post('/add',
                                 headers=mimetype(serial),
                                 data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY
Пример #20
0
def test_add_default_not_allowed(temp_server, serial):
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_server.post('/add',
                                 headers=mimetype(serial),
                                 data=blob)
    assert 'NOT FOUND' in response1.status
    assert response1.status_code == RC.NOT_FOUND
Пример #21
0
def test_repr_hdma():
    csv = CSV(example('hmda-small.csv'))
    t = TableSymbol('hmda', csv.schema)

    assert compute(t.head(), csv)

    columns = ['action_taken_name', 'agency_abbr', 'applicant_ethnicity_name']
    assert compute(t[columns].head(), csv)
Пример #22
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s.sort('sepal_length').species,
                         csv,
                         comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'species'])
Пример #23
0
def test_repr_hdma():
    csv = CSV(example('hmda-small.csv'))
    t = TableSymbol('hmda', csv.schema)

    assert compute(t.head(), csv)

    columns = ['action_taken_name', 'agency_abbr', 'applicant_ethnicity_name']
    assert compute(t[columns].head(), csv)
Пример #24
0
def test_cant_add_data_to_server(iris_server, serial):
    # try adding more data to server
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = iris_server.post('/add',
                                 headers=mimetype(serial),
                                 data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY
Пример #25
0
def test_add_default_not_allowed(temp_server, serial):
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_server.post('/add',
                                 headers=mimetype(serial),
                                 data=blob)
    assert 'NOT FOUND' in response1.status
    assert response1.status_code == RC.NOT_FOUND
Пример #26
0
def test_pre_compute_with_projection_projects_on_data_frames():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(),
                         csv,
                         comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'sepal_width'])
Пример #27
0
def test_map_called_on_data_star():
    r = data(example('accounts_*.csv'))
    s = symbol('s', discover(r))
    flag[0] = False
    a = compute(s.count(), r)
    b = compute(s.count(), r, map=mymap)
    assert a == b
    assert flag[0]
Пример #28
0
def test_map_called_on_resource_star():
    r = resource(example('accounts_*.csv'))
    s = symbol('s', discover(r))
    flag[0] = False
    a = compute(s.count(), r)
    b = compute(s.count(), r, map=mymap)
    assert a == b
    assert flag[0]
Пример #29
0
def test_add_expanded_payload(temp_add_server, serial):
    # Ensure that the expanded payload format is accepted by the server
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': {'source': iris_path,
                                  'kwargs': {'delimiter': ','}}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED
Пример #30
0
def test_add_expanded_payload(temp_add_server, serial):
    # Ensure that the expanded payload format is accepted by the server
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': {'source': iris_path,
                                  'kwargs': {'delimiter': ','}}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED
Пример #31
0
def data():
    try:
        t = odo(example('nyc.csv'),
                'mysql+pymysql://%s@localhost/test::nyc' % getuser())
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t.bind
        finally:
            drop(t)
Пример #32
0
def nyc():
    try:
        t = odo(example('nyc.csv'),
                'postgresql://postgres@localhost/test::nyc')
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t
        finally:
            drop(t)
Пример #33
0
def test_into_ctable_pytables():
    from bcolz import ctable
    tble = PyTables(example('accounts.h5'), datapath='/accounts')
    ct = into(ctable, tble)
    ctn = len(ct)
    tbn = len(tble)
    ctf, ctl = ct[0], ct[-1]
    tbf, tbl = tble[0], tble[-1]
    tble._v_file.close()
    assert ctn == tbn
    assert ctf == tbf
    assert ctl == tbl
Пример #34
0
def test_into_ctable_pytables():
    from bcolz import ctable
    tble = PyTables(example('accounts.h5'), datapath='/accounts')
    ct = into(ctable, tble)
    ctn = len(ct)
    tbn = len(tble)
    ctf, ctl = ct[0], ct[-1]
    tbf, tbl = tble[0], tble[-1]
    tble._v_file.close()
    assert ctn == tbn
    assert ctf == tbf
    assert ctl == tbl
Пример #35
0
def nyc_csv(pg_ip):
    try:
        t = odo(
            example('nyc.csv'),
            'postgresql://postgres@{}/test::nyc'.format(pg_ip),
        )
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t
        finally:
            drop(t)
Пример #36
0
def nyc(pg_ip):
    # odoing csv -> pandas -> postgres is more robust, as it doesn't require
    # the postgres server to be on the same filesystem as the csv file.
    nyc_pd = odo(example('nyc.csv'), pd.DataFrame)
    try:
        t = odo(nyc_pd, 'postgresql://postgres@{}/test::nyc'.format(pg_ip))
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t
        finally:
            drop(t)
Пример #37
0
def nyc_csv(pg_ip):
    try:
        t = odo(
            example('nyc.csv'),
            'postgresql://postgres@{}/test::nyc'.format(pg_ip),
        )
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t
        finally:
            drop(t)
Пример #38
0
def data():
    try:
        t = odo(
            example('nyc.csv'),
            'mysql+pymysql://%s@localhost/test::nyc' % getuser()
        )
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t.bind
        finally:
            drop(t)
Пример #39
0
def test_pre_compute_on_multiple_datasets_is_selective():
    from odo import CSV
    from blaze import Data
    from blaze.cached import CachedDataset

    df = pd.DataFrame(
        [[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300],
         [4, 'Denis', 400], [5, 'Edith', -500]],
        columns=['id', 'name', 'amount'])
    iris = CSV(example('iris.csv'))
    dset = CachedDataset({'df': df, 'iris': iris})

    d = Data(dset)
    assert str(compute(d.df.amount)) == str(df.amount)
Пример #40
0
def nyc(pg_ip):
    # odoing csv -> pandas -> postgres is more robust, as it doesn't require
    # the postgres server to be on the same filesystem as the csv file.
    nyc_pd = odo(example('nyc.csv'), pd.DataFrame)
    try:
        t = odo(nyc_pd,
                'postgresql://postgres@{}/test::nyc'.format(pg_ip))
    except sa.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        try:
            yield t
        finally:
            drop(t)
Пример #41
0
def test_pandas_dynd(data, schema):
    arr = nd.array(data, dtype=schema)

    result = into(DataFrame, arr)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    nda = nd.array([[1,2,3], [4,5,6], [7,8,9]])
    csv = CSV(example('accounts.csv'))
    df_csv = into(DataFrame, csv)
    df_nd = into(df_csv, nda)
    df_no_names = into(DataFrame, nda)

    assert list(df_nd.columns) == list(df_csv.columns)
    assert list(df_no_names.columns) == [0,1,2]
Пример #42
0
def test_pandas_dynd():
    arr = nd.array(data, dtype=schema)

    result = into(DataFrame, arr)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    nda = nd.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    csv = CSV(example('accounts.csv'))
    df_csv = into(DataFrame, csv)
    df_nd = into(df_csv, nda)
    df_no_names = into(DataFrame, nda)

    assert list(df_nd.columns) == list(df_csv.columns)
    assert list(df_no_names.columns) == [0, 1, 2]
Пример #43
0
def nyc():
    with open(example('nyc.csv'), 'rb') as f:
        raw = f.read()
        with tmpfile('.csv') as name:
            with open(name, 'wb') as g:
                g.write(raw)
            try:
                t = odo(name, 'postgresql://postgres@localhost/test::nyc')
            except sa.exc.OperationalError as e:
                pytest.skip(str(e))
            else:
                try:
                    yield t
                finally:
                    drop(t)
Пример #44
0
def test_pre_compute_on_multiple_datasets_is_selective():
    from odo import CSV
    from blaze import Data
    from blaze.cached import CachedDataset

    df = pd.DataFrame([[1, 'Alice',   100],
                         [2, 'Bob',    -200],
                         [3, 'Charlie', 300],
                         [4, 'Denis',   400],
                         [5, 'Edith',  -500]], columns=['id', 'name', 'amount'])
    iris = CSV(example('iris.csv'))
    dset = CachedDataset({'df': df, 'iris': iris})

    d = Data(dset)
    assert str(compute(d.df.amount)) == str(df.amount)
Пример #45
0
def test_add_errors(temp_add_server, serial):
    pre_datashape = datashape.dshape(
        temp_add_server.get('/datashape').data.decode('utf-8'))
    bunk_path = example('bunk.csv')
    blob = serial.dumps({'bunk': bunk_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY

    # Test that the datashape of the server is accessible and unchanged after
    # trying to add a non-existent dataset.
    response2 = temp_add_server.get('/datashape')
    assert response2.status_code == RC.OK
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(pre_datashape, response_dshape)
Пример #46
0
def test_add_errors(temp_add_server, serial):
    pre_datashape = datashape.dshape(temp_add_server
                                     .get('/datashape')
                                     .data.decode('utf-8'))
    bunk_path = example('bunk.csv')
    blob = serial.dumps({'bunk': bunk_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY

    # Test that the datashape of the server is accessible and unchanged after
    # trying to add a non-existent dataset.
    response2 = temp_add_server.get('/datashape')
    assert response2.status_code == RC.OK
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(pre_datashape, response_dshape)
Пример #47
0
def test_add_data_twice_error(temp_server, serial):
    # add iris
    iris_path = example('iris.csv')
    payload = serial.dumps({'iris': iris_path})
    temp_server.post('/add', headers=mimetype(serial), data=payload)

    # Try to add to existing 'iris'
    resp = temp_server.post('/add', headers=mimetype(serial), data=payload)
    assert resp.status_code == RC.CONFLICT

    # Verify the server still serves the original 'iris'.
    ds = datashape.dshape(temp_server.get('/datashape').data.decode('utf-8'))
    t = symbol('t', ds)
    query = {'expr': to_tree(t.iris)}
    resp = temp_server.post('/compute',
                            data=serial.dumps(query),
                            headers=mimetype(serial))
    assert resp.status_code == RC.OK
Пример #48
0
def test_add_data_to_server(serial):
    with temp_server(data) as test:
        # add data
        initial_datashape = test.get('/datashape').data.decode('utf-8')
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = test.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        new_datashape = test.get('/datashape').data.decode('utf-8')
        data2 = data.copy()
        data2.update({'iris': resource(iris_path)})
        expected2 = str(discover(data2))
        from pprint import pprint as pp
        #import ipdb; ipdb.set_trace()
        assert new_datashape == expected2
        a = new_datashape != initial_datashape
        assert new_datashape != initial_datashape

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = test.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Пример #49
0
def test_add_data_twice_error(temp_add_server, serial):
    # add iris
    iris_path = example('iris.csv')
    payload = serial.dumps({'iris': iris_path})
    temp_add_server.post('/add',
                         headers=mimetype(serial),
                         data=payload)

    # Try to add to existing 'iris'
    resp = temp_add_server.post('/add',
                                headers=mimetype(serial),
                                data=payload)
    assert resp.status_code == RC.CONFLICT

    # Verify the server still serves the original 'iris'.
    response_ds = temp_add_server.get('/datashape').data.decode('utf-8')
    ds = datashape.dshape(response_ds)
    t = symbol('t', ds)
    query = {'expr': to_tree(t.iris)}
    resp = temp_add_server.post('/compute',
                                data=serial.dumps(query),
                                headers=mimetype(serial))
    assert resp.status_code == RC.OK
Пример #50
0
def test_add_data_to_server(serial):
    with temp_server(data) as test:
        # add data
        initial_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = test.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        new_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        data2 = data.copy()
        data2.update({'iris': resource(iris_path)})
        expected2 = datashape.dshape(discover(data2))
        from pprint import pprint as pp
        assert_dshape_equal(new_datashape, expected2)
        assert new_datashape.measure.fields != initial_datashape.measure.fields

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = test.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Пример #51
0
def test_compute_chunks_on_single_csv():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    expr = s.sepal_length.max()
    assert compute(expr, {s: csv}, comfortable_memory=10, chunksize=50) == 7.9
Пример #52
0
def test_pre_compute_with_head_on_large_csv_yields_iterator():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(
        pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
Пример #53
0
def test_pre_compute_on_large_csv_gives_chunked_reader():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv, comfortable_memory=10),
                      (chunks(pd.DataFrame), pd.io.parsers.TextFileReader))
Пример #54
0
def test_pre_compute_on_small_csv_gives_dataframe():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv), (Series, DataFrame))
Пример #55
0
def test_Data_on_json_is_concrete():
    d = Data(example('accounts-streaming.json'))

    assert compute(d.amount.sum()) == 100 - 200 + 300 + 400 - 500
    assert compute(d.amount.sum()) == 100 - 200 + 300 + 400 - 500
Пример #56
0
def test_data_passes_kwargs_to_resource():
    assert Data(example('iris.csv'), encoding='ascii').data.encoding == 'ascii'
Пример #57
0
from blaze.server.client import mimetype
from blaze.server.server import Server, to_tree, from_tree
from blaze.server.serialization import all_formats


accounts = DataFrame([['Alice', 100], ['Bob', 200]],
                     columns=['name', 'amount'])

cities = DataFrame([['Alice', 'NYC'], ['Bob', 'LA']],
                   columns=['name', 'city'])

events = DataFrame([[1, datetime(2000, 1, 1, 12, 0, 0)],
                    [2, datetime(2000, 1, 2, 12, 0, 0)]],
                   columns=['value', 'when'])

db = resource('sqlite:///' + example('iris.db'))

data = {'accounts': accounts,
          'cities': cities,
          'events': events,
              'db': db}


@pytest.fixture(scope='module')
def server():
    s = Server(data, all_formats)
    s.app.testing = True
    return s


@pytest.yield_fixture
Пример #58
0
def iris_server():
    iris = CSV(example('iris.csv'))
    s = Server(iris, all_formats)
    s.app.testing = True
    with s.app.test_client() as c:
        yield c
Пример #59
0
def test_pre_compute_on_small_csv_gives_dataframe():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    assert isinstance(pre_compute(s.species, csv), DataFrame)