Python Dampr.memory примеры использования

Язык программирования: Python

Пространство имен/Пакет: dampr

Класс/Тип: Dampr

Метод/Функция: memory

Примеров на hotexamples.com: 13

Python Dampr.memory - 13 примеров найдено. Это лучшие примеры Python кода для dampr.Dampr.memory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

memory(13)

text(5)

read_input(3)

run(2)

Основные методы

memory (13)

text (5)

read_input (3)

run (2)

Пример #1

Показать файл

Файл: test_dampr.py Проект: wroldwiedbwe/Dampr

    def _test_concat(self):
        """
        Tests concatenating K datasets into a new Dampr
        """

        word1 = Dampr.memory("abcdefg")
        word1.concat(Dampr.memory("hijklmn"))

        results = sorted(list(word1.run()))
        self.assertEquals(results, list('abcdefghijklmn'))

Пример #2

Показать файл

    def intersect(self, keys, min_match=None):
        if not isinstance(keys, (list, tuple)):
            keys = [keys]

        if min_match is None:
            min_match = len(keys)

        if isinstance(min_match, float):
            min_match = int(min_match * len(keys))

        paths = read_paths(self.path, self.suffix)

        str_keys = u','.join(u'"{}"'.format(key) for key in keys)
        query = u"""
            select offset from 
            (select offset, count(*) as c 
                from key_index 
                where key in ({}) 
                group by offset) where c >= {}
            order by offset asc""".format(str_keys, min_match)

        def read_db(fname):
            db = self.open_db(fname)

            cur = db.cursor()
            cur.execute(query)
            with codecs.open(fname, encoding='utf-8') as f:
                for (offset,) in cur:
                    f.seek(offset)
                    yield f.readline()

        return Dampr.memory(paths).flat_map(read_db)

Пример #3

Показать файл

    def test_len(self):
        """
        Tests the number of items in a collection.
        """

        self.assertEquals(self.items.len().read(), [10])
        self.assertEquals(Dampr.memory([]).len().read(), [0])

Пример #4

Показать файл

    def test_stream_blocks(self):
        """
        Tests stream blocks
        """
        import heapq

        def map_topk(it):
            heap = []
            for symbol, count in it:
                heapq.heappush(heap, (count, symbol))
                if len(heap) > 2:
                    heapq.heappop(heap)

            return ((1, x) for x in heap)

        def reduce_topk(it):
            counts = (v for k, vit in it for v in vit)
            for count, symbol in heapq.nlargest(2, counts):
                yield symbol, count

        word = Dampr.memory(["supercalifragilisticexpialidociousa"])
        letter_counts = word.flat_map(lambda w: list(w)).count()

        topk = letter_counts \
                .partition_map(map_topk) \
                .partition_reduce(reduce_topk)

        results = sorted(list(topk.run()))
        self.assertEquals(results, [('a', 4), ('i', 7)])

Пример #5

Показать файл

 def test_disjoint(self):
     items2 = Dampr.memory(list(range(10))) \
             .group_by(lambda x: -x)
     output = self.items.group_by(lambda x: x) \
             .join(items2) \
             .run()
     output = [v for k, v in output]
     self.assertEquals([], output)

Пример #6

Показать файл

    def test_repartition(self):
        items2 = Dampr.memory(list(range(10))) \
                .group_by(lambda x: -x) \
                    .reduce(lambda k, vs: sum(vs))

        output = self.items.group_by(lambda x: x) \
                .join(items2) \
                .run()

        output = [v for k, v in output]
        self.assertEquals([], output)

Пример #7

Показать файл

    def test_reduce_join(self):
        items2 = Dampr.memory(list(range(10)))
        res = self.items \
                .group_by(lambda x: x % 2) \
                .join(items2.group_by(lambda x: x % 2)) \
                    .reduce(lambda l, r: list(sorted(itertools.chain(l, r)))) \
                .run()

        output = list(res)
        self.assertEquals((0, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]), output[0])
        self.assertEquals((1, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]), output[1])

Пример #8

Показать файл

Файл: test_dampr.py Проект: wroldwiedbwe/Dampr

    def test_top_k(self):
        """
        Tests getting the top k items
        """

        word = Dampr.memory(["supercalifragilisticexpialidociousa"])
        topk = word.flat_map(lambda w: list(w)).count() \
                .topk(5, lambda x: x[1])

        results = sorted(list(topk.run()))
        self.assertEquals(results, [('a', 4), ('c', 3), ('i', 7), ('l', 3),
                                    ('s', 3)])

Пример #9

Показать файл

    def test_left_join(self):
        to_remove = Dampr.memory(list(range(10, 13)))

        output = self.items.group_by(lambda x: x) \
                .join(to_remove.group_by(lambda x: x)) \
                    .left_reduce(lambda l, r: (list(l), list(r))) \
                .filter(lambda llrs: len(llrs[1][1]) == 0) \
                .map(lambda llrs: llrs[1][0][0]) \
                .sort_by(lambda x: x) \
                .run()

        output = list(output)
        self.assertEquals(list(range(13, 20)), output)

Пример #10

Показать файл

    def test_blocks(self):
        """
        Tests Custom Blocks
        """
        from collections import defaultdict
        import heapq

        class TopKMapper(BlockMapper):
            def __init__(self, k):
                self.k = k

            def start(self):
                self.heap = []

            def add(self, _k, lc):
                heapq.heappush(self.heap, (lc[1], lc[0]))
                if len(self.heap) > self.k:
                    heapq.heappop(self.heap)

                return iter([])

            def finish(self):
                for cl in self.heap:
                    yield 1, cl

        class TopKReducer(BlockReducer):
            def __init__(self, k):
                self.k = k

            def start(self):
                pass

            def add(self, k, it):
                for count, letter in heapq.nlargest(self.k, it):
                    yield letter, (letter, count)

        word = Dampr.memory(["supercalifragilisticexpialidociousa"])
        letter_counts = word.flat_map(lambda w: list(w)).count()

        topk = letter_counts \
                .custom_mapper(TopKMapper(2)) \
                .custom_reducer(TopKReducer(2))

        results = sorted(list(topk.run()))
        self.assertEquals(results, [('a', 4), ('i', 7)])

Пример #11

Показать файл

    def union(self, keys):
        if not isinstance(keys, (list, tuple)):
            keys = [keys]

        paths = read_paths(self.path, self.suffix)

        query = """select distinct offset from key_index 
            where key in ({}) order by offset asc""".format(
                ','.join('"{}"'.format(key) for key in keys))

        def read_db(fname):
            db = self.open_db(fname)

            cur = db.cursor()
            cur.execute(query)
            with codecs.open(fname, encoding='utf-8') as f:
                for (offset,) in cur:
                    f.seek(offset)
                    yield f.readline()

        return Dampr.memory(paths).flat_map(read_db)

Пример #12

Показать файл

    def build(self, key_f, force=False):
        paths = list(read_paths(self.path, False))
        paths.sort()

        def index_file(fname):
            logging.debug("Indexing %s", fname)
            db = self.create_db(fname)
            def it():
                offset = 0
                with codecs.open(fname, encoding='utf-8') as f:
                    while True:
                        line = f.readline()
                        if len(line) == 0:
                            break

                        for key in key_f(line):
                            yield key, offset

                        offset += len(line.encode('utf-8'))

            c = db.cursor()
            c.executemany("INSERT INTO key_index values (?, ?)", it())
            db.commit()
            c.execute("create index key_idx on key_index (key)")
            db.commit()
            c.execute("select count(*) from key_index")
            count = c.fetchone()[0]
            logging.debug("Keys indexed for %s: %s", fname, count)
            
            return count

        return Dampr.memory(paths) \
                .filter(lambda fname: force or not self.exists(fname)) \
                .map(index_file) \
                .fold_by(key=lambda x: 1, binop=lambda x,y: x + y) \
                .read(name="indexing")

Пример #13

Показать файл

 def setUp(self):
     items = list(range(10, 20))
     self.items = Dampr.memory(items, partitions=2)