示例#1
0
def test_convert_logfiles_to_bag():
    with filetexts({'a1.log': 'Hello\nWorld', 'a2.log': 'Hola\nMundo'}) as fns:
        logs = chunks(TextFile)(list(map(TextFile, fns)))
        b = convert(Bag, logs)
        assert isinstance(b, Bag)
        assert 'a1.log' in str(b.dask.values())
        assert convert(list, b) == convert(list, logs)
示例#2
0
def test_small_chunk_size():
    normal = convert(Temp(CSV), resource(iris_url))
    small_chunk = convert(Temp(CSV), resource(iris_url, chunk_size=1))
    with open(normal.path, 'rb') as fn:
        normal_data = fn.read()
    with open(small_chunk.path, 'rb') as fn:
        small_chunk_data = fn.read()
    assert normal_data == small_chunk_data
示例#3
0
文件: test_url.py 项目: blaze/odo
def test_small_chunk_size():
    normal = convert(Temp(CSV), resource(iris_url))
    small_chunk = convert(Temp(CSV), resource(iris_url, chunk_size=1))
    with open(normal.path, 'rb') as fn:
        normal_data = fn.read()
    with open(small_chunk.path, 'rb') as fn:
        small_chunk_data = fn.read()
    assert normal_data == small_chunk_data
示例#4
0
def test_append_sas_to_sqlite_round_trip():
    expected = convert(set, sasfile)

    with tmpfile('db') as fn:
        r = resource('sqlite:///%s::SAS' % fn, dshape=discover(sasfile))
        append(r, sasfile)

        result = convert(set, r)

    assert expected == result
示例#5
0
def test_empty_line():
    text = '{"a": 1}\n{"a": 2}\n\n'  # extra endline
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            f.write(text)
        j = JSONLines(fn)
        assert len(convert(list, j)) == 2
示例#6
0
def test_empty_line():
    text = '{"a": 1}\n{"a": 2}\n\n'  # extra endline
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            f.write(text)
        j = JSONLines(fn)
        assert len(convert(list, j)) == 2
示例#7
0
def test_multiple_object_ids():
    data = [{'x': 1, 'y': 2, 'other': ObjectId('1' * 24)},
            {'x': 3, 'y': 4, 'other': ObjectId('2' * 24)}]
    with coll(data) as c:
        assert discover(c) == dshape('2 * {x: int64, y: int64}')

        assert convert(list, c) == [(1, 2), (3, 4)]
示例#8
0
def test_select_to_iterator():
    engine, t = single_table_engine()
    append(t, [('Alice', 100), ('Bob', 200)])

    sel = sa.select([t.c.amount + 1])

    assert convert(list, sel) == [(101,), (201,)]
    assert convert(list, sel, dshape=dshape('var * int')) == [101, 201]

    sel2 = sa.select([sa.sql.func.sum(t.c.amount)])

    assert convert(int, sel2, dshape=dshape('int')) == 300

    sel3 = sa.select([t])

    result = convert(list, sel3, dshape=discover(t))
    assert type(result[0]) is tuple
示例#9
0
def test_read_gzip():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        s = json.dumps(dat).encode('utf-8')
        f.write(s)
        f.close()
        js = JSON(fn)
        assert convert(list, js) == dat
示例#10
0
def test_fixed_convert():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, 'foo')
        r = resource('hdfstore://' + fn + '::/foo')
        try:
            assert eq(convert(pd.DataFrame, r), df)
        finally:
            r.parent.close()
示例#11
0
def test_convert_sas_to_dataframe():
    df = convert(pd.DataFrame, sasfile)
    assert isinstance(df, pd.DataFrame)

    # pandas doesn't support date
    expected = str(ds.measure).replace('date', 'datetime')

    assert str(discover(df).measure).replace('?', '') == expected
示例#12
0
def test_read_gzip():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        s = json.dumps(dat).encode('utf-8')
        f.write(s)
        f.close()
        js = JSON(fn)
        assert convert(list, js) == dat
示例#13
0
def test_read_gzip():
    with tmpfile('.bson.gz') as fn:
        f = gzip.open(fn, 'wb')
        for item in dat:
            f.write(bson.BSON.encode(item))
        f.close()
        b = BSON(fn)
        assert convert(list, b) == dat
示例#14
0
文件: test_sql.py 项目: kwin-wang/odo
def test_select_to_iterator():
    engine, t = single_table_engine()
    append(t, [('Alice', 100), ('Bob', 200)])

    sel = sa.select([t.c.amount + 1])

    assert convert(list, sel) == [(101, ), (201, )]
    assert convert(list, sel, dshape=dshape('var * int')) == [101, 201]

    sel2 = sa.select([sa.sql.func.sum(t.c.amount)])

    assert convert(int, sel2, dshape=dshape('int')) == 300

    sel3 = sa.select([t])

    result = convert(list, sel3, dshape=discover(t))
    assert type(result[0]) is tuple
示例#15
0
def test_fixed_convert():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, 'foo')
        r = resource('hdfstore://'+fn+'::/foo')
        try:
            assert eq(convert(pd.DataFrame, r), df)
        finally:
            r.parent.close()
示例#16
0
def test_into_table_iterator():
    engine = sa.create_engine("sqlite:///:memory:")
    metadata = sa.MetaData(engine)
    t = dshape_to_table("points", "{x: int, y: int}", metadata=metadata)
    t.create()

    data = [(1, 1), (2, 4), (3, 9)]
    append(t, data)

    assert convert(list, t) == data
    assert isinstance(convert(list, t)[0], tuple)

    t2 = dshape_to_table("points2", "{x: int, y: int}", metadata=metadata)
    t2.create()
    data2 = [{"x": 1, "y": 1}, {"x": 2, "y": 4}, {"x": 3, "y": 9}]
    append(t2, data2)

    assert convert(list, t2) == data
示例#17
0
def test_into_table_iterator():
    engine = sa.create_engine('sqlite:///:memory:')
    metadata = sa.MetaData(engine)
    t = dshape_to_table('points', '{x: int, y: int}', metadata=metadata)
    t.create()

    data = [(1, 1), (2, 4), (3, 9)]
    append(t, data)

    assert convert(list, t) == data
    assert isinstance(convert(list, t)[0], tuple)

    t2 = dshape_to_table('points2', '{x: int, y: int}', metadata=metadata)
    t2.create()
    data2 = [{'x': 1, 'y': 1}, {'x': 2, 'y': 4}, {'x': 3, 'y': 9}]
    append(t2, data2)

    assert convert(list, t2) == data
示例#18
0
def test_insert_to_ooc():
    x = np.arange(600).reshape((20, 30))
    y = np.empty(shape=x.shape, dtype=x.dtype)
    a = convert(Array, x, blockshape=(4, 5))

    dsk = insert_to_ooc(y, a)
    core.get(merge(dsk, a.dask), list(dsk.keys()))

    assert eq(y, x)
示例#19
0
def test_insert_to_ooc():
    x = np.arange(600).reshape((20, 30))
    y = np.empty(shape=x.shape, dtype=x.dtype)
    a = convert(Array, x, chunks=(4, 5))

    dsk = insert_to_ooc(y, a)
    core.get(merge(dsk, a.dask), list(dsk.keys()))

    assert eq(y, x)
示例#20
0
def test_append_and_convert_round_trip():
    engine = sa.create_engine("sqlite:///:memory:")
    metadata = sa.MetaData(engine)
    t = sa.Table("bank", metadata, sa.Column("name", sa.String, primary_key=True), sa.Column("balance", sa.Integer))
    t.create()

    data = [("Alice", 1), ("Bob", 2)]
    append(t, data)

    assert convert(list, t) == data
示例#21
0
def test_read_gzip_lines():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        for item in dat:
            s = json.dumps(item).encode('utf-8')
            f.write(s)
            f.write(b'\n')
        f.close()
        js = JSONLines(fn)
        assert convert(list, js) == dat
示例#22
0
def test_read_gzip_lines():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        for item in dat:
            s = json.dumps(item).encode('utf-8')
            f.write(s)
            f.write(b'\n')
        f.close()
        js = JSONLines(fn)
        assert convert(list, js) == dat
示例#23
0
def test_append_and_convert_round_trip():
    engine = sa.create_engine('sqlite:///:memory:')
    metadata = sa.MetaData(engine)
    t = sa.Table('bank', metadata,
                 sa.Column('name', sa.String, primary_key=True),
                 sa.Column('balance', sa.Integer))
    t.create()

    data = [('Alice', 1), ('Bob', 2)]
    append(t, data)

    assert convert(list, t) == data
示例#24
0
def test_url_to_hdfs():
    from .test_hdfs import tmpfile_hdfs, hdfs, HDFS

    with tmpfile_hdfs() as target:

        # build temp csv for assertion check
        url_csv = resource(iris_url)
        csv = convert(Temp(CSV), url_csv)

        # test against url
        scsv = HDFS(CSV)(target, hdfs=hdfs)
        odo(iris_url, scsv)

        assert discover(scsv) == discover(csv)
示例#25
0
def test_multiple_object_ids():
    data = [{
        'x': 1,
        'y': 2,
        'other': ObjectId('1' * 24)
    }, {
        'x': 3,
        'y': 4,
        'other': ObjectId('2' * 24)
    }]
    with coll(data) as c:
        assert discover(c) == dshape('2 * {x: int64, y: int64}')

        assert convert(list, c) == [(1, 2), (3, 4)]
示例#26
0
def test_multiple_jsonlines():
    a, b = '_test_a1.json', '_test_a2.json'
    try:
        with ignoring(OSError):
            os.remove(a)
        with ignoring(OSError):
            os.remove(b)
        with open(a, 'w') as f:
            json.dump(dat, f)
        with open(b'_test_a2.json', 'w') as f:
            json.dump(dat, f)
        r = resource('_test_a*.json')
        result = convert(list, r)
        assert len(result) == len(dat) * 2
    finally:
        with ignoring(OSError):
            os.remove(a)
        with ignoring(OSError):
            os.remove(b)
示例#27
0
def test_multiple_jsonlines():
    a, b = '_test_a1.json', '_test_a2.json'
    try:
        with ignoring(OSError):
            os.remove(a)
        with ignoring(OSError):
            os.remove(b)
        with open(a, 'w') as f:
            json.dump(dat, f)
        with open(b'_test_a2.json', 'w') as f:
            json.dump(dat, f)
        r = resource('_test_a*.json')
        result = convert(list, r)
        assert len(result) == len(dat) * 2
    finally:
        with ignoring(OSError):
            os.remove(a)
        with ignoring(OSError):
            os.remove(b)
示例#28
0
def test_convert():
    x = np.arange(600).reshape((20, 30))
    d = convert(Array, x, chunks=(4, 5))
    assert isinstance(d, Array)
示例#29
0
def test_convert_local_file_to_temp_ssh_file():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = convert(Temp(SSH(CSV)), csv, hostname='localhost')

        assert into(list, csv) == into(list, scsv)
示例#30
0
def test_chunks():
    with file(df) as (fn, f, dset):
        c = convert(chunks(pd.DataFrame), dset)
        assert eq(convert(np.ndarray, c), df)
示例#31
0
文件: test_ssh.py 项目: Curezhang/odo
def test_convert_local_file_to_temp_ssh_file():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = convert(Temp(SSH(CSV)), csv, hostname='localhost')

        assert into(list, csv) == into(list, scsv)
示例#32
0
def test_convert_to_numpy_array():
    x = np.arange(600).reshape((20, 30))
    d = convert(Array, x, chunks=(4, 5))
    x2 = convert(np.ndarray, d)
    assert eq(x, x2)
示例#33
0
def test_write_gzip():
    with tmpfile('.bson.gz') as fn:
        b = BSON(fn)
        append(b, dat)

        assert convert(list, b) == dat
示例#34
0
def test_convert_to_temp_bson():
    bs = convert(Temp(BSON), dat)
    assert isinstance(bs, BSON)
    assert isinstance(bs, _Temp)

    assert convert(list, bs) == dat
示例#35
0
def test_chunks():
    with file(df) as (fn, f, dset):
        c = convert(chunks(pd.DataFrame), dset)
        assert eq(convert(np.ndarray, c), df)
示例#36
0
文件: chunks.py 项目: mhlr/blaze
def pre_compute(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    if all(isinstance(e, Cheap) for e in path(expr, leaf)):
        return convert(Iterator, data)
    else:
        raise MDNotImplementedError()
示例#37
0
def test_convert():
    x = pd.DataFrame(np.arange(50).reshape(10, 5),
                     columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    assert isinstance(d, dd.DataFrame)
def test_convert_to_pandas_series():
    x = pd.DataFrame(np.arange(50).reshape(10, 5), columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    a = convert(pd.Series, d.a)
    tm.assert_series_equal(a, x.a)
def test_convert_to_pandas_dataframe():
    x = pd.DataFrame(np.arange(50).reshape(10, 5), columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    x2 = convert(pd.DataFrame, d)
    tm.assert_frame_equal(x2, x)
def test_convert():
    x = pd.DataFrame(np.arange(50).reshape(10, 5), columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    assert isinstance(d, dd.DataFrame)
示例#41
0
def test_convert_pandas():
    with file(df) as (fn, f, dset):
        assert eq(convert(pd.DataFrame, dset), df)
示例#42
0
def test_extend_empty():
    engine, t = single_table_engine()

    assert not convert(list, t)
    append(t, [])
    assert not convert(list, t)
示例#43
0
def test_convert_chunks():
    with file(df) as (fn, f, dset):
        c = convert(chunks(pd.DataFrame), dset, chunksize=len(df) / 2)
        assert len(list(c)) == 2
        assert eq(convert(pd.DataFrame, c), df)
示例#44
0
def test_sql_field_names_disagree_on_order():
    r = resource('sqlite:///:memory:::tb', dshape=dshape('{x: int, y: int}'))
    append(r, [(1, 2), (10, 20)], dshape=dshape('{y: int, x: int}'))
    assert convert(set, r) == set([(2, 1), (20, 10)])
示例#45
0
def test_convert_to_pandas_dataframe():
    x = pd.DataFrame(np.arange(50).reshape(10, 5),
                     columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    x2 = convert(pd.DataFrame, d)
    tm.assert_frame_equal(x2, x)
示例#46
0
def test_convert_bson_list():
    with bson_file(dat) as fn:
        b = BSON(fn)
        assert convert(list, b) == dat
示例#47
0
def test_convert():
    url_csv = resource(iris_url)
    t_csv = convert(Temp(CSV), url_csv)
    assert discover(url_csv) == discover(t_csv)

    assert isinstance(t_csv, _Temp)
示例#48
0
def test_convert_to_pandas_series():
    x = pd.DataFrame(np.arange(50).reshape(10, 5),
                     columns=list('abcde'))
    d = convert(dd.DataFrame, x, npartitions=2)
    a = convert(pd.Series, d.a)
    tm.assert_series_equal(a, x.a)
示例#49
0
def test_convert_pandas():
    with file(df) as (fn, f, dset):
        assert eq(convert(pd.DataFrame, dset), df)
示例#50
0
def test_convert_chunks():
    with file(df) as (fn, f, dset):
        c = convert(chunks(pd.DataFrame), dset, chunksize=len(df) / 2)
        assert len(list(c)) == 2
        assert eq(convert(pd.DataFrame, c), df)
示例#51
0
def test_append_bson():
    with tmpfile('.bson') as fn:
        b = BSON(fn)
        append(b, dat)
        assert convert(list, b) == dat
示例#52
0
def test_array_interface():
    x = np.arange(600).reshape((20, 30))
    d = convert(Array, x, chunks=(4, 5))

    assert eq(x, np.array(d))