async def test_zip_files(self, temp_files): files = [] for filename in ['file1.ext', 'zip.zip', 'file2.ext']: path = temp_files.add_file(filename) contents = os.urandom(2**5) with open(path, 'wb') as f: f.write(contents) files.append({ 'filename': filename, 'path': path, 'contents': contents, 'handle': open(path, 'rb') }) stream = streams.ZipStreamReader( AsyncIterator( (file['filename'], streams.FileStreamReader(file['handle'])) for file in files)) data = await stream.read() for file in files: file['handle'].close() zip = zipfile.ZipFile(io.BytesIO(data)) # Verify CRCs: `.testzip()` returns `None` if there are no bad files in the zipfile assert zip.testzip() is None for file in files: assert zip.open(file['filename']).read() == file['contents'] compression_type = zip.open(file['filename'])._compress_type if file['filename'].endswith('.zip'): assert compression_type == zipfile.ZIP_STORED else: assert compression_type != zipfile.ZIP_STORED
def zip(self, path, **kwargs): """Streams a Zip archive of the given folder :param str path: The folder to compress """ if path.is_file: base_path = path.parent.path else: base_path = path.path names, coros, remaining = [], [], [path] while remaining: path = remaining.pop() metadata = yield from self.metadata(path) for item in metadata: current_path = yield from self.revalidate_path( path, item.name, folder=item.is_folder ) if current_path.is_file: names.append(current_path.path.replace(base_path, '', 1)) coros.append(self.__zip_defered_download(current_path)) else: remaining.append(current_path) return streams.ZipStreamReader(*zip(names, coros))
async def test_multiple_files(self): file1 = ('file1.txt', streams.StringStream('[File One]')) file2 = ('file2.txt', streams.StringStream('[File Two]')) file3 = ('file3.txt', streams.StringStream('[File Three]')) files = AsyncIterator([file1, file2, file3]) stream = streams.ZipStreamReader(files) data = await stream.read() zip = zipfile.ZipFile(io.BytesIO(data)) # Verify CRCs assert zip.testzip() is None # Check content of included files zipped1 = zip.open('file1.txt') assert zipped1.read() == b'[File One]' zipped2 = zip.open('file2.txt') assert zipped2.read() == b'[File Two]' zipped3 = zip.open('file3.txt') assert zipped3.read() == b'[File Three]'
def test_multiple_large_files(self, temp_files): files = [] for index in range(5): filename = 'file{}.ext'.format(index) path = temp_files.add_file(filename) contents = os.urandom(2**18) with open(path, 'wb') as f: f.write(contents) files.append({ 'filename': filename, 'path': path, 'contents': contents }) for file in files: file['handle'] = open(file['path'], 'rb') stream = streams.ZipStreamReader( *((file['filename'], streams.FileStreamReader(file['handle'])) for file in files)) data = yield from stream.read() for file in files: file['handle'].close() zip = zipfile.ZipFile(io.BytesIO(data)) # Verify CRCs assert zip.testzip() is None for file in files: assert zip.open(file['filename']).read() == file['contents']
async def upload(self, stream, path, **kwargs): """Zips the given stream then uploads to Dataverse. This will delete existing draft files with the same name. :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse :param str path: The filename prepended with '/' :rtype: dict, bool """ stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5)) zip_stream = streams.ZipStreamReader( AsyncIterator([(path.name, stream)])) # Write stream to disk (Necessary to find zip file size) f = tempfile.TemporaryFile() chunk = await zip_stream.read() while chunk: f.write(chunk) chunk = await zip_stream.read() file_stream = streams.FileStreamReader(f) dv_headers = { "Content-Disposition": "filename=temp.zip", "Content-Type": "application/zip", "Packaging": "http://purl.org/net/sword/package/SimpleZip", "Content-Length": str(file_stream.size), } # Delete old file if it exists if path.identifier: await self.delete(path) resp = await self.make_request('POST', self.build_url( settings.EDIT_MEDIA_BASE_URL, 'study', self.doi), headers=dv_headers, auth=(self.token, ), data=file_stream, expects=(201, ), throws=exceptions.UploadError) await resp.release() # Find appropriate version of file metadata = await self._get_data('latest') files = metadata if isinstance(metadata, list) else [] file_metadata = next(file for file in files if file.name == path.name) if stream.writers['md5'].hexdigest != file_metadata.extra['hashes'][ 'md5']: raise exceptions.UploadChecksumMismatchError() return file_metadata, path.identifier is None
async def zip(self, path, **kwargs): """Streams a Zip archive of the given folder :param str path: The folder to compress """ metadata = await self.metadata(path) if path.is_file: metadata = [metadata] path = path.parent return streams.ZipStreamReader(ZipStreamGenerator(self, path, *metadata))
async def zip(self, path: wb_path.WaterButlerPath, **kwargs) -> asyncio.StreamReader: """Streams a Zip archive of the given folder :param path: ( :class:`.WaterButlerPath` ) The folder to compress """ meta_data = await self.metadata(path) # type: ignore if path.is_file: meta_data = [meta_data] # type: ignore path = path.parent return streams.ZipStreamReader(ZipStreamGenerator(self, path, *meta_data)) # type: ignore
def test_single_file(self): file = ('filename.extension', streams.StringStream('[File Content]')) stream = streams.ZipStreamReader(file) data = yield from stream.read() zip = zipfile.ZipFile(io.BytesIO(data)) # Verify CRCs assert zip.testzip() is None result = zip.open('filename.extension') # Check content of included file assert result.read() == b'[File Content]'
def upload(self, stream, path, **kwargs): """Zips the given stream then uploads to Dataverse. This will delete existing draft files with the same name. :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse :param str path: The filename prepended with '/' :rtype: dict, bool """ stream = streams.ZipStreamReader((path.name, stream)) # Write stream to disk (Necessary to find zip file size) f = tempfile.TemporaryFile() chunk = yield from stream.read() while chunk: f.write(chunk) chunk = yield from stream.read() stream = streams.FileStreamReader(f) dv_headers = { "Content-Disposition": "filename=temp.zip", "Content-Type": "application/zip", "Packaging": "http://purl.org/net/sword/package/SimpleZip", "Content-Length": str(stream.size), } # Delete old file if it exists if path.identifier: yield from self.delete(path) yield from self.make_request( 'POST', self.build_url(settings.EDIT_MEDIA_BASE_URL, 'study', self.doi), headers=dv_headers, auth=(self.token, ), data=stream, expects=(201, ), throws=exceptions.UploadError ) # Find appropriate version of file metadata = yield from self._get_data('latest') files = metadata if isinstance(metadata, list) else [] file_metadata = next(file for file in files if file['name'] == path.name) return file_metadata, path.identifier is None
def test_download_stream(self): data = b'freddie brian john roger' stream = streams.StringStream(data) stream.content_type = 'application/octet-stream' zipstream = streams.ZipStreamReader(('file.txt', stream)) self.mock_provider.zip = utils.MockCoroutine(return_value=zipstream) resp = yield self.http_client.fetch( self.get_url('/zip?provider=queenhub&path=/freddie.png'), ) zip = zipfile.ZipFile(io.BytesIO(resp.body)) assert zip.testzip() is None assert zip.open('file.txt').read() == data
async def test_single_large_file(self, temp_files): filename = 'foo.txt' path = temp_files.add_file(filename) random_data = os.urandom(2**18) with open(path, 'wb') as f: f.write(random_data) with open(path, 'rb') as f: stream = streams.ZipStreamReader( AsyncIterator([(filename, streams.FileStreamReader(f))])) data = await stream.read() zip = zipfile.ZipFile(io.BytesIO(data)) # Verify CRCs assert zip.testzip() is None result = zip.open('foo.txt') # Check content of included file assert result.read() == random_data