Пример #1
0
def test_map_method():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a, b=2, c=3):
        return a + b + c

    assert b.map(myadd).compute() == list(map(myadd, x))
    assert b.map(myadd, b2).compute() == list(map(myadd, x, x2))
    assert b.map(myadd, 10).compute() == [myadd(i, 10) for i in x]
    assert b.map(myadd, b=10).compute() == [myadd(i, b=10) for i in x]
    assert (b.map(myadd, b2, c=10).compute() ==
            [myadd(i, j, 10) for (i, j) in zip(x, x2)])
    x_sum = sum(x)
    assert (b.map(myadd, b.sum(), c=10).compute() ==
            [myadd(i, x_sum, 10) for i in x])

    # check that map works with multiarg functions. Can be removed after
    # deprecated behavior is removed
    assert b.map(add, b2).compute() == list(map(add, x, x2))

    # check that map works with vararg functions. Can be removed after
    # deprecated behavior is removed
    def vararg_inc(*args):
        return inc(*args)

    assert b.map(vararg_inc).compute(get=dask.get) == list(map(inc, x))
Пример #2
0
def test_map_method():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a, b=2, c=3):
        return a + b + c

    assert b.map(myadd).compute() == list(map(myadd, x))
    assert b.map(myadd, b2).compute() == list(map(myadd, x, x2))
    assert b.map(myadd, 10).compute() == [myadd(i, 10) for i in x]
    assert b.map(myadd, b=10).compute() == [myadd(i, b=10) for i in x]
    assert (b.map(myadd, b2, c=10).compute() == [
        myadd(i, j, 10) for (i, j) in zip(x, x2)
    ])
    x_sum = sum(x)
    assert (b.map(myadd, b.sum(),
                  c=10).compute() == [myadd(i, x_sum, 10) for i in x])

    # check that map works with multiarg functions. Can be removed after
    # deprecated behavior is removed
    assert b.map(add, b2).compute() == list(map(add, x, x2))

    # check that map works with vararg functions. Can be removed after
    # deprecated behavior is removed
    def vararg_inc(*args):
        return inc(*args)

    assert b.map(vararg_inc).compute(get=dask.get) == list(map(inc, x))
Пример #3
0
def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)
Пример #4
0
def test_read_text_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.read_text(fn, blocksize=100)
        c = db.read_text(fn)
        assert len(b.dask) > 5
        assert list(map(str, b.str.strip())) == list(map(str, c.str.strip()))

        d = db.read_text([fn], blocksize=100)
        assert list(b) == list(d)
Пример #5
0
def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)
Пример #6
0
def test_read_text_encoding():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write((u'你好!' + os.linesep).encode('gb18030') * 100)
        b = db.read_text(fn, blocksize=100, encoding='gb18030')
        c = db.read_text(fn, encoding='gb18030')
        assert len(b.dask) > 5
        assert list(map(lambda x: x.encode('utf-8'), b)) == list(map(lambda x: x.encode('utf-8'), c))

        d = db.read_text([fn], blocksize=100, encoding='gb18030')
        assert list(b) == list(d)
Пример #7
0
def test_read_text_encoding():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write((u'你好!' + os.linesep).encode('gb18030') * 100)
        b = db.read_text(fn, blocksize=100, encoding='gb18030')
        c = db.read_text(fn, encoding='gb18030')
        assert len(b.dask) > 5
        assert list(map(lambda x: x.encode('utf-8'), b)) == list(map(lambda x: x.encode('utf-8'), c))

        d = db.read_text([fn], blocksize=100, encoding='gb18030')
        assert list(b) == list(d)
Пример #8
0
def test_from_filenames_encoding():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write((u'你好!' + os.linesep).encode('gb18030') * 100)
        b = db.from_filenames(fn, chunkbytes=100, encoding='gb18030')
        c = db.from_filenames(fn, encoding='gb18030')
        assert len(b.dask) > 5
        assert list(map(lambda x: x.encode('utf-8'), b)) == list(map(lambda x: x.encode('utf-8'), c))

        d = db.from_filenames([fn], chunkbytes=100, encoding='gb18030')
        assert list(b) == list(d)
Пример #9
0
def test_read_text_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.read_text(fn, blocksize=100)
        c = db.read_text(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.read_text([fn], blocksize=100)
        assert list(b) == list(d)
Пример #10
0
def test_bag_map():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a=1, b=2, c=3):
        return a + b + c

    assert db.map(myadd, b).compute() == list(map(myadd, x))
    assert db.map(myadd, a=b).compute() == list(map(myadd, x))
    assert db.map(myadd, b, b2).compute() == list(map(myadd, x, x2))
    assert db.map(myadd, b, 10).compute() == [myadd(i, 10) for i in x]
    assert db.map(myadd, 10, b=b).compute() == [myadd(10, b=i) for i in x]

    sol = [myadd(i, b=j, c=100) for (i, j) in zip(x, x2)]
    assert db.map(myadd, b, b=b2, c=100).compute() == sol

    sol = [myadd(i, c=100) for (i, j) in zip(x, x2)]
    assert db.map(myadd, b, c=100).compute() == sol

    x_sum = sum(x)
    sol = [myadd(x_sum, b=i, c=100) for i in x2]
    assert db.map(myadd, b.sum(), b=b2, c=100).compute() == sol

    sol = [myadd(i, b=x_sum, c=100) for i in x2]
    assert db.map(myadd, b2, b.sum(), c=100).compute() == sol

    sol = [myadd(a=100, b=x_sum, c=i) for i in x2]
    assert db.map(myadd, a=100, b=b.sum(), c=b2).compute() == sol

    a = dask.delayed(10)
    assert db.map(myadd, b, a).compute() == [myadd(i, 10) for i in x]
    assert db.map(myadd, b, b=a).compute() == [myadd(i, b=10) for i in x]

    # Mispatched npartitions
    fewer_parts = db.from_sequence(range(100), npartitions=5)
    with pytest.raises(ValueError):
        db.map(myadd, b, fewer_parts)

    # No bags
    with pytest.raises(ValueError):
        db.map(myadd, b.sum(), 1, 2)

    # Unequal partitioning
    unequal = db.from_sequence(range(110), npartitions=10)
    with pytest.raises(ValueError):
        db.map(myadd, b, unequal, c=b2).compute()
    with pytest.raises(ValueError):
        db.map(myadd, b, b=unequal, c=b2).compute()
Пример #11
0
def test_bag_map():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a=1, b=2, c=3):
        return a + b + c

    assert db.map(myadd, b).compute() == list(map(myadd, x))
    assert db.map(myadd, a=b).compute() == list(map(myadd, x))
    assert db.map(myadd, b, b2).compute() == list(map(myadd, x, x2))
    assert db.map(myadd, b, 10).compute() == [myadd(i, 10) for i in x]
    assert db.map(myadd, 10, b=b).compute() == [myadd(10, b=i) for i in x]

    sol = [myadd(i, b=j, c=100) for (i, j) in zip(x, x2)]
    assert db.map(myadd, b, b=b2, c=100).compute() == sol

    sol = [myadd(i, c=100) for (i, j) in zip(x, x2)]
    assert db.map(myadd, b, c=100).compute() == sol

    x_sum = sum(x)
    sol = [myadd(x_sum, b=i, c=100) for i in x2]
    assert db.map(myadd, b.sum(), b=b2, c=100).compute() == sol

    sol = [myadd(i, b=x_sum, c=100) for i in x2]
    assert db.map(myadd, b2, b.sum(), c=100).compute() == sol

    sol = [myadd(a=100, b=x_sum, c=i) for i in x2]
    assert db.map(myadd, a=100, b=b.sum(), c=b2).compute() == sol

    a = dask.delayed(10)
    assert db.map(myadd, b, a).compute() == [myadd(i, 10) for i in x]
    assert db.map(myadd, b, b=a).compute() == [myadd(i, b=10) for i in x]

    # Mispatched npartitions
    fewer_parts = db.from_sequence(range(100), npartitions=5)
    with pytest.raises(ValueError):
        db.map(myadd, b, fewer_parts)

    # No bags
    with pytest.raises(ValueError):
        db.map(myadd, b.sum(), 1, 2)

    # Unequal partitioning
    unequal = db.from_sequence(range(110), npartitions=10)
    with pytest.raises(ValueError):
        db.map(myadd, b, unequal, c=b2).compute()
    with pytest.raises(ValueError):
        db.map(myadd, b, b=unequal, c=b2).compute()
Пример #12
0
def test_map_keynames():
    b = db.from_sequence([1, 2, 3])
    d = dict(b.map(inc).__dask_graph__())
    assert "inc" in map(dask.utils.key_split, d)

    assert set(b.map(inc).__dask_graph__()) != set(
        b.map_partitions(inc).__dask_graph__())
Пример #13
0
def test_map_method():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a, b=2, c=3):
        return a + b + c

    assert b.map(myadd).compute() == list(map(myadd, x))
    assert b.map(myadd, b2).compute() == list(map(myadd, x, x2))
    assert b.map(myadd, 10).compute() == [myadd(i, 10) for i in x]
    assert b.map(myadd, b=10).compute() == [myadd(i, b=10) for i in x]
    assert (b.map(myadd, b2, c=10).compute() ==
            [myadd(i, j, 10) for (i, j) in zip(x, x2)])
    x_sum = sum(x)
    assert (b.map(myadd, b.sum(), c=10).compute() ==
            [myadd(i, x_sum, 10) for i in x])
Пример #14
0
def test_map_method():
    b = db.from_sequence(range(100), npartitions=10)
    b2 = db.from_sequence(range(100, 200), npartitions=10)
    x = b.compute()
    x2 = b2.compute()

    def myadd(a, b=2, c=3):
        return a + b + c

    assert b.map(myadd).compute() == list(map(myadd, x))
    assert b.map(myadd, b2).compute() == list(map(myadd, x, x2))
    assert b.map(myadd, 10).compute() == [myadd(i, 10) for i in x]
    assert b.map(myadd, b=10).compute() == [myadd(i, b=10) for i in x]
    assert b.map(myadd, b2,
                 c=10).compute() == [myadd(i, j, 10) for (i, j) in zip(x, x2)]
    x_sum = sum(x)
    assert b.map(myadd, b.sum(),
                 c=10).compute() == [myadd(i, x_sum, 10) for i in x]
Пример #15
0
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({'x': list(range(100)),
                       'y': [str(i) for i in range(100)]})
    a = dd.from_pandas(df, 10)

    c = a.to_castra()
    default = db.from_castra(c)
    with_columns = db.from_castra(c, 'x')
    with_index = db.from_castra(c, 'x', index=True)
    with_nparts = db.from_castra(c, 'x', npartitions=4)
    try:
        assert list(default) == list(zip(range(100), map(str, range(100))))
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert with_nparts.npartitions == 4
        assert list(with_nparts) == list(range(100))
    finally:
        c.drop()
Пример #16
0
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({
        'x': list(range(100)),
        'y': [str(i) for i in range(100)]
    })
    a = dd.from_pandas(df, 10)

    c = a.to_castra()
    default = db.from_castra(c)
    with_columns = db.from_castra(c, 'x')
    with_index = db.from_castra(c, 'x', index=True)
    with_nparts = db.from_castra(c, 'x', npartitions=4)
    try:
        assert list(default) == list(zip(range(100), map(str, range(100))))
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert with_nparts.npartitions == 4
        assert list(with_nparts) == list(range(100))
    finally:
        c.drop()
Пример #17
0
def test_map():
    c = b.map(inc)
    assert c.compute() == list(map(inc, b.compute()))
    assert c.name == b.map(inc).name
Пример #18
0
def test_bz2_stream():
    text = '\n'.join(map(str, range(10000)))
    compressed = bz2.compress(text.encode())
    assert (list(take(100, bz2_stream(compressed))) ==
            list(map(lambda x: str(x) + '\n', range(100))))
Пример #19
0
def test_map_is_lazy():
    from dask.bag.core import map
    assert isinstance(map(lambda x: x, [1, 2, 3]), Iterator)
Пример #20
0
def test_map_is_lazy():
    from dask.bag.core import map
    assert isinstance(map(lambda x: x, [1, 2, 3]), Iterator)
Пример #21
0
def test_map_keynames():
    b = db.from_sequence([1, 2, 3])
    d = dict(b.map(inc).__dask_graph__())
    assert 'inc' in map(dask.utils.key_split, d)

    assert set(b.map(inc).__dask_graph__()) != set(b.map_partitions(inc).__dask_graph__())
Пример #22
0
def test_bz2_stream():
    text = '\n'.join(map(str, range(10000)))
    compressed = bz2.compress(text.encode())
    assert (list(take(100, bz2_stream(compressed))) ==
            list(map(lambda x: str(x) + '\n', range(100))))