Пример #1
0
 def __init__(self, default_connections, keyed_connections, fields,
              filename, protocol):
     super(ToPickleConnection, self).__init__(default_connections,
                                              keyed_connections, fields)
     self.file = open(filename, 'wb')
     self.protocol = protocol
     pickle.dump(fields, self.file, self.protocol)
Пример #2
0
 def __init__(self, default_connections, keyed_connections, fields,
              filename, protocol):
     super(ToPickleConnection, self).__init__(default_connections,
                                              keyed_connections, fields)
     self.file = open(filename, 'wb')
     self.protocol = protocol
     pickle.dump(fields, self.file, self.protocol)
Пример #3
0
def _writepickle(table, source, mode, protocol, write_header):
    source = write_source_from_arg(source, mode)
    with source.open(mode) as f:
        it = iter(table)
        hdr = next(it)
        if write_header:
            pickle.dump(hdr, f, protocol)
        for row in it:
            pickle.dump(row, f, protocol)
Пример #4
0
def _writepickle(table, source, mode, protocol, write_header):
    source = write_source_from_arg(source)
    with source.open(mode) as f:
        it = iter(table)
        hdr = next(it)
        if write_header:
            pickle.dump(hdr, f, protocol)
        for row in it:
            pickle.dump(row, f, protocol)
Пример #5
0
def test_frompickle():

    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    actual = frompickle(f.name)
    ieq(table, actual)
    ieq(table, actual)  # verify can iterate twice
Пример #6
0
 def __iter__(self):
     protocol = self.protocol
     source = write_source_from_arg(self.source)
     with source.open('wb') as f:
         it = iter(self.table)
         hdr = next(it)
         if self.write_header:
             pickle.dump(hdr, f, protocol)
         yield tuple(hdr)
         for row in it:
             pickle.dump(row, f, protocol)
             yield tuple(row)
Пример #7
0
 def __iter__(self):
     protocol = self.protocol
     source = write_source_from_arg(self.source)
     with source.open('wb') as f:
         it = iter(self.table)
         hdr = next(it)
         if self.write_header:
             pickle.dump(hdr, f, protocol)
         yield tuple(hdr)
         for row in it:
             pickle.dump(row, f, protocol)
             yield tuple(row)
Пример #8
0
def test_frompickle():

    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    actual = frompickle(f.name)
    ieq(table, actual)
    ieq(table, actual)  # verify can iterate twice
Пример #9
0
 def accept(self, row):
     row = tuple(row)
     if len(self.cache) < self.buffersize:
         self.cache.append(row)
     else:
         # sort and dump the chunk
         self.cache.sort(key=self.getkey, reverse=self.reverse)
         f = NamedTemporaryFile()  # TODO need not be named
         for r in self.cache:
             pickle.dump(r, f, protocol=-1)
         f.flush()
         f.seek(0)
         self.chunkfiles.append(f)
         self.cache = [row]
Пример #10
0
 def accept(self, row):
     row = tuple(row)
     if len(self.cache) < self.buffersize:
         self.cache.append(row)
     else:
         # sort and dump the chunk
         self.cache.sort(key=self.getkey, reverse=self.reverse)
         f = NamedTemporaryFile()  # TODO need not be named
         for r in self.cache:
             pickle.dump(r, f, protocol=-1)
         f.flush()
         f.seek(0)
         self.chunkfiles.append(f)
         self.cache = [row]
Пример #11
0
    def _iternocache(self, source, key, reverse):
        debug('iterate without cache')
        self.clearcache()
        it = iter(source)

        hdr = next(it)
        yield tuple(hdr)

        if key is not None:
            # convert field selection into field indices
            indices = asindices(hdr, key)
        else:
            indices = range(len(hdr))
        # now use field indices to construct a _getkey function
        # TODO check if this raises an exception on short rows
        getkey = comparable_itemgetter(*indices)

        # TODO support native comparison

        # initialise the first chunk
        rows = list(itertools.islice(it, 0, self.buffersize))
        rows.sort(key=getkey, reverse=reverse)

        # have we exhausted the source iterator?
        if self.buffersize is None or len(rows) < self.buffersize:
            # yes, table fits within sort buffer

            if self.cache:
                debug('caching mem')
                self._hdrcache = hdr
                self._memcache = rows
                # actually not needed to iterate from memcache
                self._getkey = getkey

            for row in rows:
                yield tuple(row)

        else:
            # no, table is too big, need to sort in chunks

            chunkfiles = []

            while rows:

                # dump the chunk
                with NamedTemporaryFile(dir=self.tempdir, delete=False,
                                        mode='wb') as f:
                    # N.B., we **don't** want the file to be deleted on close,
                    # but we **do** want the file to be deleted when self
                    # is garbage collected, or when the program exits. When
                    # all references to the wrapper are gone, the file should
                    # get deleted.
                    wrapper = _NamedTempFileDeleteOnGC(f.name)
                    debug('created temporary chunk file %s' % f.name)
                    for row in rows:
                        pickle.dump(row, f, protocol=-1)
                    f.flush()
                    chunkfiles.append(wrapper)

                # grab the next chunk
                rows = list(itertools.islice(it, 0, self.buffersize))
                rows.sort(key=getkey, reverse=reverse)

            if self.cache:
                debug('caching files')
                self._hdrcache = hdr
                self._filecache = chunkfiles
                self._getkey = getkey

            chunkiters = [_iterchunk(f.name) for f in chunkfiles]
            for row in _mergesorted(getkey, reverse, *chunkiters):
                yield tuple(row)
Пример #12
0
 def accept(self, row):
     pickle.dump(row, self.file, self.protocol)
     # forward rows on the default pipe (behave like tee)
     self.broadcast(row)
Пример #13
0
 def accept(self, row):
     pickle.dump(row, self.file, self.protocol)
     # forward rows on the default pipe (behave like tee)
     self.broadcast(row)