Python from_filenames примеры, dask.bag.from_filenames Python примеры использования

Пример #1

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

    assert raises(ValueError, lambda: db.from_filenames('non-existent-*-path'))

Пример #2

0

Показать файл

Файл: test_bag.py Проект: BabeNovelty/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

    assert raises(ValueError, lambda: db.from_filenames('non-existent-*-path'))

Пример #3

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = gzip.open(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == list(c)

Пример #4

0

Показать файл

Файл: test_bag.py Проект: BabeNovelty/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = gzip.open(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == [s.decode() for s in c]

Пример #5

0

Показать файл

Файл: test_bag.py Проект: rla3rd/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100, linesep='\n')
        c = db.from_filenames(fn, linesep='\n')
        assert len(b.dask) > 5
        assert list(b) == list(c)

Пример #6

0

Показать файл

Файл: test_bag.py Проект: rabernat/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'w') as f:
            f.write('Hello, world!\n' * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == list(c)

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

Пример #7

0

Показать файл

Файл: test_bag.py Проект: BabeNovelty/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

Пример #8

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

Пример #9

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames_encoding():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write((u'你好！' + os.linesep).encode('gb18030') * 100)
        b = db.from_filenames(fn, chunkbytes=100, encoding='gb18030')
        c = db.from_filenames(fn, encoding='gb18030')
        assert len(b.dask) > 5
        assert list(map(lambda x: x.encode('utf-8'), b)) == list(map(lambda x: x.encode('utf-8'), c))

        d = db.from_filenames([fn], chunkbytes=100, encoding='gb18030')
        assert list(b) == list(d)

Пример #10

0

Показать файл

Файл: test_bag.py Проект: rla3rd/dask

def test_gh715():
    bin_data = u'\u20ac'.encode('utf-8')
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(bin_data)
        a = db.from_filenames(fn)
        assert a.compute()[0] == bin_data.decode('utf-8')

Пример #11

0

Показать файл

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (bz2.BZ2File, os.path.abspath('foo.json.bz2'))),
        (list, (bz2.BZ2File, os.path.abspath('bar.json.bz2')))
    ]))

Пример #12

0

Показать файл

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) == set([
        (list, (gzip.open, os.path.abspath('foo.json.gz'))),
        (list, (gzip.open, os.path.abspath('bar.json.gz')))
    ]))

Пример #13

0

Показать файл

Файл: core.py Проект: treycausey/malort

def analyze(path, parse_timestamps=True, **kwargs):
    """
    Analyze a given directory of either .json or flat text files
    with delimited JSON to get relevant key statistics.

    Parameters
    ----------
    path: string
        Path to directory
    parse_timestamps: boolean, default True
        If True, will attempt to regex match ISO8601 formatted parse_timestamps
    kwargs:
        passed into json.loads. Here you can specify encoding, etc.
    """

    stats = {}

    start_time = time.time()
    file_list = [os.path.join(path, f) for f in os.listdir(path)]
    bag = db.from_filenames(file_list).map(json.loads)
    recur_partial = partial(recur_dict, parse_timestamps=parse_timestamps)
    stats = bag.fold(recur_partial, combine_stats, initial={}).compute()
    count = stats["total_records"]
    del stats["total_records"]

    elapsed = time.time() - start_time
    print('Malort run finished: {} JSON blobs analyzed in {} seconds.'
          .format(count, elapsed))
    return MalortResult(stats, count, elapsed)

Пример #14

0

Показать файл

Файл: test_bag.py Проект: melodylail/dask

def test_gh715():
    bin_data = u'\u20ac'.encode('utf-8')
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(bin_data)
        a = db.from_filenames(fn)
        assert a.compute()[0] == bin_data.decode('utf-8')

Пример #15

0

Показать файл

Файл: core.py Проект: sgmqs/malort

def analyze(path, parse_timestamps=True, **kwargs):
    """
    Analyze a given directory of either .json or flat text files
    with delimited JSON to get relevant key statistics.

    Parameters
    ----------
    path: string
        Path to directory
    parse_timestamps: boolean, default True
        If True, will attempt to regex match ISO8601 formatted parse_timestamps
    kwargs:
        passed into json.loads. Here you can specify encoding, etc.
    """

    stats = {}

    start_time = time.time()
    file_list = [os.path.join(path, f) for f in os.listdir(path)]
    bag = db.from_filenames(file_list).map(json.loads)
    recur_partial = partial(recur_dict, parse_timestamps=parse_timestamps)
    stats = bag.fold(recur_partial, combine_stats, initial={}).compute()
    count = stats["total_records"]
    del stats["total_records"]

    elapsed = time.time() - start_time
    print('Malort run finished: {} JSON blobs analyzed in {} seconds.'.format(
        count, elapsed))
    return MalortResult(stats, count, elapsed)

Пример #16

0

Показать файл

Файл: test_bag.py Проект: melodylail/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (decode_sequence, system_encoding,
                (bz2.BZ2File, os.path.abspath('foo.json.bz2'), 'rb'))),
        (list, (decode_sequence, system_encoding,
                (bz2.BZ2File, os.path.abspath('bar.json.bz2'), 'rb')))
    ]))

Пример #17

0

Показать файл

Файл: test_bag.py Проект: melodylail/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) == set([
        (list, (decode_sequence, system_encoding,
                (gzip.open, os.path.abspath('foo.json.gz'), 'rb'))),
        (list, (decode_sequence, system_encoding,
                (gzip.open, os.path.abspath('bar.json.gz'), 'rb')))
    ]))

Пример #18

0

Показать файл

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (io.TextIOWrapper, (io.BufferedReader,
                                   (open, os.path.abspath('foo.json.bz2'),
                                    'rb', 'bz2')), system_encoding, None,
                os.linesep)),
        (list, (io.TextIOWrapper, (io.BufferedReader,
                                   (open, os.path.abspath('bar.json.bz2'),
                                    'rb', 'bz2')), system_encoding, None,
                os.linesep))
    ]))

Пример #19

0

Показать файл

Файл: test_bag.py Проект: rla3rd/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list,
                  (io.TextIOWrapper,
                   (io.BufferedReader,
                    (open, os.path.abspath('foo.json.bz2'), 'rb', 'bz2')),
                   system_encoding, None, os.linesep)),
                 (list,
                  (io.TextIOWrapper,
                   (io.BufferedReader,
                    (open, os.path.abspath('bar.json.bz2'), 'rb', 'bz2')),
                   system_encoding, None, os.linesep))]))

Пример #20

0

Показать файл

Файл: test_bag.py Проект: BabeNovelty/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list, (bz2.BZ2File, os.path.abspath('foo.json.bz2'))),
                 (list, (bz2.BZ2File, os.path.abspath('bar.json.bz2')))]))

Пример #21

0

Показать файл

Файл: test_bag.py Проект: BabeNovelty/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) ==
            set([(list, (gzip.open, os.path.abspath('foo.json.gz'))),
                 (list, (gzip.open, os.path.abspath('bar.json.gz')))]))

Пример #22

0

Показать файл

Файл: test_bag.py Проект: kastnerkyle/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

Пример #23

0

Показать файл

Файл: dask.py Проект: TomAugspurger/odo

def bag_to_iterator(x, **kwargs):
    return db.from_filenames([tf.path for tf in x])

Пример #24

0

Показать файл

Файл: dream.py Проект: StuartAxelOwen/dream

 def of_files(self, filenames, chunkbytes=None):
     return Dream(*bag.from_filenames(filenames, chunkbytes)._args)

Пример #25

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list, (decode_sequence, system_encoding, (bz2.BZ2File, os.path.abspath('foo.json.bz2'), 'rb'))),
                 (list, (decode_sequence, system_encoding, (bz2.BZ2File, os.path.abspath('bar.json.bz2'), 'rb')))]))

Пример #26

0

Показать файл

Файл: test_bag.py Проект: fortiema/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) ==
            set([(list, (decode_sequence, system_encoding, (gzip.open, os.path.abspath('foo.json.gz'), 'rb'))),
                 (list, (decode_sequence, system_encoding, (gzip.open, os.path.abspath('bar.json.gz'), 'rb')))]))

Пример #27

0

Показать файл

Файл: dask.py Проект: gyenney/Tools

def bag_to_iterator(x, **kwargs):
    return db.from_filenames([tf.path for tf in x])

Пример #28

0

Показать файл

Файл: test_bag.py Проект: minrk/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

Python from_filenames примеры использования