class DiscoZipFile(ZipFile, object): def __init__(self): self.buffer = BytesIO() super(DiscoZipFile, self).__init__(self.buffer, 'w', ZIP_DEFLATED) def writepath(self, pathname, exclude=()): for file in files(pathname): name, ext = os.path.splitext(file) if ext not in exclude: self.write(file, file) def writemodule(self, module, arcname=None): if isinstance(module, basestring): module = __import__(module) self.write(getsourcefile(module), arcname=arcname) def writesource(self, object): self.writepath(getsourcefile(getmodule(object))) def dump(self, handle): handle.write(self.dumps()) def dumps(self): self.buffer.seek(0) return self.buffer.read()
def setUp(self): self.ddfs.push('disco:test:blobs', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.push('disco:test:blobs', [(BytesIO(b'datablob2'), 'blobdata2')]) self.ddfs.push('disco:test:emptyblob', [(BytesIO(b''), 'empty')]) self.ddfs.tag('disco:test:tag', [['urls']]) self.ddfs.tag('disco:test:metatag', [['tag://disco:test:tag'], ['tag://disco:test:metatag']])
def read(self, size=-1): buf = BytesIO() while size: bytes = self._read_chunk(size if size > 0 else CHUNK_SIZE) if not bytes: break size -= len(bytes) buf.write(bytes) return buf.getvalue()
def test_push(self): self.ddfs.push('disco:test:blobs', [(BytesIO(b'blobdata'), 'blobdata')]) self.assert_(self.ddfs.exists('disco:test:blobs')) self.ddfs.push('tag://disco:test:blobs2', [(BytesIO(b'blobdata'), 'blobdata')]) self.assert_(self.ddfs.exists('disco:test:blobs2')) self.ddfs.delete('disco:test:blobs') self.assert_(not self.ddfs.exists('disco:test:blobs')) self.ddfs.delete('disco:test:blobs2') self.assert_(not self.ddfs.exists('disco:test:blobs2'))
def setUp(self): self.ddfs.push('disco:test:authrd', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.push('disco:test:authwr', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.push('disco:test:authempty', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.setattr('disco:test:authrd', 'a', 'v') self.ddfs.setattr('disco:test:authwr', 'a', 'v') self.ddfs.setattr('disco:test:authrd', 'ddfs:read-token', 'rdr') self.ddfs.setattr('disco:test:authwr', 'ddfs:write-token', 'wtr') self.ddfs.setattr('disco:test:authempty', 'a', 'v') self.ddfs.setattr('disco:test:authempty', 'ddfs:read-token', '') self.ddfs.setattr('disco:test:authempty', 'ddfs:write-token', '')
class DiscoOutputStream_v1(object): def __init__(self, stream, version=1, compression_level=2, min_hunk_size=HUNK_SIZE, max_record_size=None): self.stream = stream self.version = version self.compression_level = compression_level self.max_record_size = max_record_size self.min_hunk_size = min_hunk_size self.size = 0 self.hunk_size = 0 self.hunk = BytesIO() def add(self, k, v): self.append((k, v)) def append(self, record): self.hunk_write(pickle_dumps(record, 1)) if self.hunk_size > self.min_hunk_size: self.flush() def close(self): if self.hunk_size: self.flush() self.flush() def flush(self): hunk = self.hunk.getvalue() checksum = crc32(hunk) & 0xFFFFFFFF iscompressed = int(self.compression_level > 0) if iscompressed: hunk = compress(hunk, self.compression_level) data = b''.join([struct.pack('<BBIQ', 128 + self.version, iscompressed, checksum, len(hunk)), hunk]) self.stream.write(data) self.size += len(data) self.hunk_size = 0 self.hunk = BytesIO() def hunk_write(self, data): size = len(data) if self.max_record_size and size > self.max_record_size: raise ValueError("Record of size " + str(size) + " is larger than max_record_size: " + str(self.max_record_size)) self.hunk.write(data) self.hunk_size += size
def __init__(self, stream, version=1, compression_level=2, min_hunk_size=HUNK_SIZE, max_record_size=None): self.stream = stream self.version = version self.compression_level = compression_level self.max_record_size = max_record_size self.min_hunk_size = min_hunk_size self.size = 0 self.hunk_size = 0 self.hunk = BytesIO()
def flush(self): hunk = self.hunk.getvalue() checksum = crc32(hunk) & 0xFFFFFFFF iscompressed = int(self.compression_level > 0) if iscompressed: hunk = compress(hunk, self.compression_level) data = b''.join([ struct.pack('<BBIQ', 128 + self.version, iscompressed, checksum, len(hunk)), hunk ]) self.stream.write(data) self.size += len(data) self.hunk_size = 0 self.hunk = BytesIO()
def jobpack(self, jobname): """Return the :class:`disco.job.JobPack` submitted for the job.""" from disco.compat import BytesIO from disco.job import JobPack return JobPack.load( BytesIO( self.request('/disco/ctrl/parameters?name={0}'.format(jobname), as_bytes=True)))
def test_create_delete_create_token(self): self.ddfs.delete('disco:test:delete2') self.assert_(not self.ddfs.exists('disco:test:delete2')) self.ddfs.push('disco:test:delete2', [(BytesIO(b'abc'), 'atom')], token='secret1') self.assert_(self.ddfs.exists('disco:test:delete2')) self.assert_("disco:test:delete2" in self.ddfs.list('disco:test:delete2'))
def test_create_delete_create(self): self.ddfs.delete('disco:test:delete1') self.assert_(not self.ddfs.exists('disco:test:delete1')) self.ddfs.push('disco:test:delete1', [(BytesIO(b'datablob'), 'blobdata')]) self.assert_(self.ddfs.exists('disco:test:delete1')) self.assert_( "disco:test:delete1" in self.ddfs.list('disco:test:delete1'))
def __iter__(self): chunk = self._read_chunk(CHUNK_SIZE) while chunk: next_chunk = self._read_chunk(CHUNK_SIZE) lines = list(BytesIO(chunk)) last = lines.pop() if next_chunk else b'' for line in lines: yield line chunk = last + next_chunk
def flush(self): hunk = self.hunk.getvalue() checksum = crc32(hunk) & 0xFFFFFFFF iscompressed = int(self.compression_level > 0) if iscompressed: hunk = compress(hunk, self.compression_level) data = b''.join([struct.pack('<BBIQ', 128 + self.version, iscompressed, checksum, len(hunk)), hunk]) self.stream.write(data) self.size += len(data) self.hunk_size = 0 self.hunk = BytesIO()
def disco_input_stream(stream, size, url, ignore_corrupt=False): """Input stream for Disco's internal compression format.""" from disco.compat import BytesIO, int_of_byte from disco.compat import pickle_load import struct, gzip, zlib offset = 0 while True: header = stream.read(1) if not header: return if int_of_byte(header[0]) < 128: for e in old_netstr_reader(stream, size, url, header): yield e return try: is_compressed, checksum, hunk_size =\ struct.unpack('<BIQ', stream.read(13)) except: raise DataError("Truncated data at {0} bytes".format(offset), url) if not hunk_size: return hunk = stream.read(hunk_size) data = b'' try: data = zlib.decompress(hunk) if is_compressed else hunk if checksum != (zlib.crc32(data) & 0xFFFFFFFF): raise ValueError("Checksum does not match") except (ValueError, zlib.error) as e: if not ignore_corrupt: raise DataError( "Corrupted data between bytes {0}-{1}: {2}".format( offset, offset + hunk_size, e), url) offset += hunk_size hunk = BytesIO(data) while True: try: yield pickle_load(hunk) except EOFError: break except UnpicklingError as e: if not ignore_corrupt: raise DataError( "Corrupted data between bytes {0}-{1}: {2}".format( offset - hunk_size, offset, e), url)
def chunk(self, tag, urls, replicas=None, forceon=[], retries=10, delayed=False, update=False, token=None, chunk_size=CHUNK_SIZE, max_record_size=MAX_RECORD_SIZE, **kwargs): """ Chunks the contents of `urls`, pushes the chunks to ddfs and tags them with `tag`. """ from disco.core import result_iterator if 'reader' not in kwargs: kwargs['reader'] = None def chunk_iter(replicas): chunker = Chunker(chunk_size=chunk_size, max_record_size=max_record_size) return chunker.chunks(result_iterator([replicas], **kwargs)) def chunk_name(replicas, n): url = listify(replicas)[0] return self.safe_name('{0}-{1}'.format(os.path.basename(url), n)) blobs = [ self._push((BytesIO(chunk), chunk_name(reps, n)), replicas=replicas, forceon=forceon, retries=retries) for reps in urls for n, chunk in enumerate(chunk_iter(reps)) ] return (self.tag(tag, blobs, delayed=delayed, update=update, token=token), blobs)
def test_atomic_token(self): self.ddfs.push('disco:test:atomic1', [(BytesIO(b'abc'), 'atom')], update=True, delayed=True, token='secret1') getter = lambda: self.ddfs.getattr('disco:test:atomic1', 'foobar') self.assertCommErrorCode(401, getter) self.assertEquals(self.ddfs.getattr('disco:test:atomic1', 'ddfs:write-token', token='secret1'), 'secret1') self.ddfs.put('disco:test:atomic2', [], token='secret2') getter = lambda: self.ddfs.getattr('disco:test:atomic2', 'foobar') self.assertCommErrorCode(401, getter) self.assertEquals(self.ddfs.getattr('disco:test:atomic2', 'ddfs:write-token', token='secret2'), 'secret2') self.ddfs.put('disco:test:notoken', []) self.assertEquals(self.ddfs.getattr('disco:test:notoken', 'ddfs:write-token'), None)
def read(self): if self.isopen: return BytesIO(str_to_bytes(self.source)).read return open(self.source, 'rb').read
def __init__(self): self.buffer = BytesIO() super(DiscoZipFile, self).__init__(self.buffer, 'w', ZIP_DEFLATED)
def codec(self, version=1, corrupt=False, ignore_corrupt=False, **kwargs): buf = BytesIO() stream = DiscoOutputStream(buf, version=version, **kwargs) t = self.encode(stream, self.data) final_size = len(buf.getvalue()) final_mb = final_size / 1024**2 msg = (("{0:1.2f}MB encoded in {1:1.3f}s ({2:1.2f}MB/s), " "encoded size {3:1.3f}MB (version: {4}, {5})") .format(self.size, t, self.size / t, final_mb, version, kwargs)) if corrupt: buf.seek(0) new = BytesIO() new.write(buf.read(100)) new.write(b'X') buf.read(1) new.write(buf.read()) buf = new buf.seek(0) t, res = self.decode(buf, final_size, "nourl", ignore_corrupt=ignore_corrupt) if not ignore_corrupt: print("{0}, decoded in {1:1.3f}s ({2:1.2f}MB/s)" .format(msg, t, self.size / t)) return res
def setUp(self): self.ddfs.push('disco:test:attrs', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.setattr('disco:test:attrs', 'a1', 'v1') self.ddfs.setattr('disco:test:attrs', 'a2', 'v2')
def makeout(self): return DiscoOutputStream(BytesIO(), max_record_size=MAX_RECORD_SIZE)
def setUp(self): self.ddfs.push('disco:test:delete1', [(BytesIO(b'datablob'), 'blobdata')]) self.ddfs.push('disco:test:delete2', [(BytesIO(b'datablob'), 'blobdata')])
def save_oob(host, name, key, value, ddfs_token=None): from disco.ddfs import DDFS DDFS(host).push(DDFS.job_oob(name), [(BytesIO(value), key)], delayed=True)
class DDFSUpdateTestCase(TestCase): data = BytesIO(b'blobdata') def setUp(self): self.ddfs.delete('disco:test:blobs') def blobnames(self, tag): from disco.ddfs import DDFS return list(reversed(list(DDFS.blob_name(repl[0]) for repl in self.ddfs.blobs(tag)))) def test_update_empty_new(self): self.ddfs.push('disco:test:blobs', [], update=True) self.assertEquals(len(self.blobnames('disco:test:blobs')), 0) self.ddfs.delete('disco:test:blobs') def test_update(self): for i in range(5): self.ddfs.push('disco:test:blobs', [(self.data, 'dup')] * 2, update=True) self.assertEquals(len(self.blobnames('disco:test:blobs')), 1) for i in range(5): self.ddfs.push('disco:test:blobs', [(self.data, 'dup2')], update=True, delayed=True) self.assertEquals(len(self.blobnames('disco:test:blobs')), 2) self.ddfs.delete('disco:test:blobs') def test_no_garbage(self): self.ddfs.push('disco:test:blobs', [(self.data, 'dup')] * 2, update=True) tag_pre = self.ddfs.get('disco:test:blobs') self.assertEquals(len(tag_pre['urls']), 1) self.ddfs.tag('disco:test:blobs', tag_pre['urls'], update=True) self.assertEquals(tag_pre['id'], self.ddfs.get('disco:test:blobs')['id']) self.ddfs.delete('disco:test:blobs') def test_random(self): import random keys = [str(random.randint(1, 100)) for i in range(100)] ukeys = [] for key in keys: self.ddfs.push('disco:test:blobs', [(self.data, key)], update=True) if key not in ukeys: ukeys.append(key) self.assertEquals(ukeys, self.blobnames('disco:test:blobs')) self.ddfs.delete('disco:test:blobs') def test_mixed(self): keys = [] for key in map(str, range(10)): self.ddfs.push('disco:test:blobs', [(self.data, key)] * 2) keys += [key] * 2 for key in map(str, range(15)): self.ddfs.push('disco:test:blobs', [(self.data, key)] * 2, update=True) if int(key) > 9: keys.append(key) for key in map(str, range(10)): self.ddfs.push('disco:test:blobs', [(self.data, key)] * 2, delayed=True) keys += [key] * 2 self.assertEquals(keys, self.blobnames('disco:test:blobs')) self.ddfs.delete('disco:test:blobs') def tearDown(self): self.ddfs.delete('disco:test:blobs')
def dumps(obj, protocol=None): file = BytesIO() Pickler(file, protocol).dump(obj) return file.getvalue()
def setUp(self): super(AuthTestCase, self).setUp() from disco.compat import BytesIO self.tag = 'disco:test:authjob' self.ddfs.push(self.tag, [(BytesIO(b'blobdata'), 'blob')])
def makeout(self): return DiscoOutputStream(BytesIO(), max_record_size=self.max_record_size)