def __init__(self, default_connections, keyed_connections, fields, filename, protocol): super(ToPickleConnection, self).__init__(default_connections, keyed_connections, fields) self.file = open(filename, 'wb') self.protocol = protocol pickle.dump(fields, self.file, self.protocol)
def __init__(self, default_connections, keyed_connections, fields, filename, protocol): super(ToPickleConnection, self).__init__(default_connections, keyed_connections, fields) self.file = open(filename, 'wb') self.protocol = protocol pickle.dump(fields, self.file, self.protocol)
def _writepickle(table, source, mode, protocol, write_header): source = write_source_from_arg(source, mode) with source.open(mode) as f: it = iter(table) hdr = next(it) if write_header: pickle.dump(hdr, f, protocol) for row in it: pickle.dump(row, f, protocol)
def _writepickle(table, source, mode, protocol, write_header): source = write_source_from_arg(source) with source.open(mode) as f: it = iter(table) hdr = next(it) if write_header: pickle.dump(hdr, f, protocol) for row in it: pickle.dump(row, f, protocol)
def test_frompickle(): f = NamedTemporaryFile(delete=False) table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) for row in table: pickle.dump(row, f) f.close() actual = frompickle(f.name) ieq(table, actual) ieq(table, actual) # verify can iterate twice
def __iter__(self): protocol = self.protocol source = write_source_from_arg(self.source) with source.open('wb') as f: it = iter(self.table) hdr = next(it) if self.write_header: pickle.dump(hdr, f, protocol) yield tuple(hdr) for row in it: pickle.dump(row, f, protocol) yield tuple(row)
def __iter__(self): protocol = self.protocol source = write_source_from_arg(self.source) with source.open('wb') as f: it = iter(self.table) hdr = next(it) if self.write_header: pickle.dump(hdr, f, protocol) yield tuple(hdr) for row in it: pickle.dump(row, f, protocol) yield tuple(row)
def test_frompickle(): f = NamedTemporaryFile(delete=False) table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) for row in table: pickle.dump(row, f) f.close() actual = frompickle(f.name) ieq(table, actual) ieq(table, actual) # verify can iterate twice
def accept(self, row): row = tuple(row) if len(self.cache) < self.buffersize: self.cache.append(row) else: # sort and dump the chunk self.cache.sort(key=self.getkey, reverse=self.reverse) f = NamedTemporaryFile() # TODO need not be named for r in self.cache: pickle.dump(r, f, protocol=-1) f.flush() f.seek(0) self.chunkfiles.append(f) self.cache = [row]
def accept(self, row): row = tuple(row) if len(self.cache) < self.buffersize: self.cache.append(row) else: # sort and dump the chunk self.cache.sort(key=self.getkey, reverse=self.reverse) f = NamedTemporaryFile() # TODO need not be named for r in self.cache: pickle.dump(r, f, protocol=-1) f.flush() f.seek(0) self.chunkfiles.append(f) self.cache = [row]
def _iternocache(self, source, key, reverse): debug('iterate without cache') self.clearcache() it = iter(source) hdr = next(it) yield tuple(hdr) if key is not None: # convert field selection into field indices indices = asindices(hdr, key) else: indices = range(len(hdr)) # now use field indices to construct a _getkey function # TODO check if this raises an exception on short rows getkey = comparable_itemgetter(*indices) # TODO support native comparison # initialise the first chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) # have we exhausted the source iterator? if self.buffersize is None or len(rows) < self.buffersize: # yes, table fits within sort buffer if self.cache: debug('caching mem') self._hdrcache = hdr self._memcache = rows # actually not needed to iterate from memcache self._getkey = getkey for row in rows: yield tuple(row) else: # no, table is too big, need to sort in chunks chunkfiles = [] while rows: # dump the chunk with NamedTemporaryFile(dir=self.tempdir, delete=False, mode='wb') as f: # N.B., we **don't** want the file to be deleted on close, # but we **do** want the file to be deleted when self # is garbage collected, or when the program exits. When # all references to the wrapper are gone, the file should # get deleted. wrapper = _NamedTempFileDeleteOnGC(f.name) debug('created temporary chunk file %s' % f.name) for row in rows: pickle.dump(row, f, protocol=-1) f.flush() chunkfiles.append(wrapper) # grab the next chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) if self.cache: debug('caching files') self._hdrcache = hdr self._filecache = chunkfiles self._getkey = getkey chunkiters = [_iterchunk(f.name) for f in chunkfiles] for row in _mergesorted(getkey, reverse, *chunkiters): yield tuple(row)
def accept(self, row): pickle.dump(row, self.file, self.protocol) # forward rows on the default pipe (behave like tee) self.broadcast(row)
def accept(self, row): pickle.dump(row, self.file, self.protocol) # forward rows on the default pipe (behave like tee) self.broadcast(row)