def codec(self, version=1, corrupt=False, ignore_corrupt=False, **kwargs): buf = BytesIO() stream = DiscoOutputStream(buf, version=version, **kwargs) t = self.encode(stream, self.data) final_size = len(buf.getvalue()) final_mb = final_size / 1024**2 msg = (("{0:1.2f}MB encoded in {1:1.3f}s ({2:1.2f}MB/s), " "encoded size {3:1.3f}MB (version: {4}, {5})") .format(self.size, t, self.size / t, final_mb, version, kwargs)) if corrupt: buf.seek(0) new = BytesIO() new.write(buf.read(100)) new.write(b'X') buf.read(1) new.write(buf.read()) buf = new buf.seek(0) t, res = self.decode(buf, final_size, "nourl", ignore_corrupt=ignore_corrupt) if not ignore_corrupt: print("{0}, decoded in {1:1.3f}s ({2:1.2f}MB/s)" .format(msg, t, self.size / t)) return res
def codec(self, version=1, corrupt=False, ignore_corrupt=False, **kwargs): buf = cStringIO.StringIO() stream = DiscoOutputStream(buf, version=version, **kwargs) t = self.encode(stream, self.data) final_size = len(buf.getvalue()) final_mb = final_size / 1024**2 msg = "%1.2fMB encoded in %1.3fs (%1.2fMB/s), "\ "encoded size %1.3fMB (version: %d, %s)" %\ (self.size, t, self.size / t, final_mb, version, kwargs) if corrupt: buf.seek(0) new = cStringIO.StringIO() new.write(buf.read(100)) new.write('X') buf.read(1) new.write(buf.read()) buf = new buf.seek(0) t, res = self.decode(buf, final_size, "nourl", ignore_corrupt=ignore_corrupt) if not ignore_corrupt: print "%s, decoded in %1.3fs (%1.2fMB/s)" % (msg, t, self.size / t) return res
def disco_output_stream(stream, partition, url, params): """Output stream for Disco's internal compression format.""" from disco.fileutils import DiscoOutputStream return DiscoOutputStream(stream)