示例#1
0
def test__read_text(e, s, a, b):
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/text.1.txt', 'wb') as f:
            f.write('Alice 100\nBob 200\nCharlie 300'.encode())

        with hdfs.open('/tmp/test/text.2.txt', 'wb') as f:
            f.write('Dan 400\nEdith 500\nFrank 600'.encode())

        with hdfs.open('/tmp/test/other.txt', 'wb') as f:
            f.write('a b\nc d'.encode())

        b = read_text('/tmp/test/text.*.txt', collection=True, lazy=True)
        yield gen.sleep(0.5)
        assert not s.tasks

        future = e.compute(b.str.strip().str.split().map(len))
        result = yield future._result()
        assert result == [2, 2, 2, 2, 2, 2]

        b = read_text('/tmp/test/other.txt', collection=True, lazy=False)
        future = e.compute(b.str.split().concat())
        result = yield future._result()
        assert result == ['a', 'b', 'c', 'd']

        L = read_text('/tmp/test/text.*.txt', collection=False, lazy=False)
        assert all(isinstance(x, Future) for x in L)

        L = read_text('/tmp/test/text.*.txt', collection=False, lazy=True)
        assert all(isinstance(x, Value) for x in L)
示例#2
0
def test__read_text(e, s, a, b):
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/text.1.txt', 'wb') as f:
            f.write('Alice 100\nBob 200\nCharlie 300'.encode())

        with hdfs.open('/tmp/test/text.2.txt', 'wb') as f:
            f.write('Dan 400\nEdith 500\nFrank 600'.encode())

        with hdfs.open('/tmp/test/other.txt', 'wb') as f:
            f.write('a b\nc d'.encode())

        b = read_text('/tmp/test/text.*.txt',
                             collection=True, lazy=True)
        yield gen.sleep(0.5)
        assert not s.tasks

        future = e.compute(b.str.strip().str.split().map(len))
        result = yield future._result()
        assert result == [2, 2, 2, 2, 2, 2]

        b = read_text('/tmp/test/other.txt',
                             collection=True, lazy=False)
        future = e.compute(b.str.split().concat())
        result = yield future._result()
        assert result == ['a', 'b', 'c', 'd']

        L = read_text('/tmp/test/text.*.txt',
                             collection=False, lazy=False)
        assert all(isinstance(x, Future) for x in L)

        L = read_text('/tmp/test/text.*.txt',
                             collection=False, lazy=True)
        assert all(isinstance(x, Value) for x in L)
示例#3
0
def test_read_text_sync(loop):
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/data.txt', 'wb') as f:
            f.write(b'hello\nworld')

        with cluster(nworkers=3) as (s, [a, b, c]):
            with Executor(('127.0.0.1', s['port']), loop=loop) as e:
                b = read_text('/tmp/test/*.txt', lazy=False)
                assert list(b.str.upper()) == ['HELLO', 'WORLD']
示例#4
0
def test_read_text_sync(loop):
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/data.txt', 'wb') as f:
            f.write(b'hello\nworld')

        with cluster(nworkers=3) as (s, [a, b, c]):
            with Executor(('127.0.0.1', s['port']), loop=loop) as e:
                b = read_text('/tmp/test/*.txt', lazy=False)
                assert list(b.str.upper()) == ['HELLO', 'WORLD']
示例#5
0
def test__read_text_json_endline(e, s, a):
    import json
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/text.1.txt', 'wb') as f:
            f.write(b'{"x": 1}\n{"x": 2}\n')

        b = read_text('/tmp/test/text.1.txt').map(json.loads)
        result = yield e.compute(b)._result()

        assert result == [{"x": 1}, {"x": 2}]
示例#6
0
def test__read_text_json_endline(e, s, a):
    import json
    with make_hdfs() as hdfs:
        with hdfs.open('/tmp/test/text.1.txt', 'wb') as f:
            f.write(b'{"x": 1}\n{"x": 2}\n')

        b = read_text('/tmp/test/text.1.txt').map(json.loads)
        result = yield e.compute(b)._result()

        assert result == [{"x": 1}, {"x": 2}]
示例#7
0
def test__read_text_unicode(e, s, a, b):
    fn = '/tmp/test/data.txt'
    data = b'abcd\xc3\xa9'
    with make_hdfs() as hdfs:
        with hdfs.open(fn, 'wb') as f:
            f.write(b'\n'.join([data, data]))

        f = read_text(fn, collection=False, lazy=False)
        result = yield f[0]._result()
        assert len(result) == 2
        assert list(map(unicode.strip, result)) == [data.decode('utf-8')] * 2
        assert len(result[0]) == 5
示例#8
0
def test__read_text_unicode(e, s, a, b):
    fn = '/tmp/test/data.txt'
    data = b'abcd\xc3\xa9'
    with make_hdfs() as hdfs:
        with hdfs.open(fn, 'wb') as f:
            f.write(b'\n'.join([data, data]))

        f = read_text(fn, collection=False, lazy=False)
        result = yield f[0]._result()
        assert len(result) == 2
        assert list(map(unicode.strip, result)) == [data.decode('utf-8')] * 2
        assert len(result[0]) == 5