示例#1
0
    async def test_zip_files(self, temp_files):

        files = []
        for filename in ['file1.ext', 'zip.zip', 'file2.ext']:
            path = temp_files.add_file(filename)
            contents = os.urandom(2**5)
            with open(path, 'wb') as f:
                f.write(contents)
            files.append({
                'filename': filename,
                'path': path,
                'contents': contents,
                'handle': open(path, 'rb')
            })

        stream = streams.ZipStreamReader(
            AsyncIterator(
                (file['filename'], streams.FileStreamReader(file['handle']))
                for file in files))
        data = await stream.read()
        for file in files:
            file['handle'].close()
        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs: `.testzip()` returns `None` if there are no bad files in the zipfile
        assert zip.testzip() is None

        for file in files:
            assert zip.open(file['filename']).read() == file['contents']
            compression_type = zip.open(file['filename'])._compress_type
            if file['filename'].endswith('.zip'):
                assert compression_type == zipfile.ZIP_STORED
            else:
                assert compression_type != zipfile.ZIP_STORED
示例#2
0
    def test_multiple_large_files(self, temp_files):
        files = []
        for index in range(5):
            filename = 'file{}.ext'.format(index)
            path = temp_files.add_file(filename)
            contents = os.urandom(2**18)

            with open(path, 'wb') as f:
                f.write(contents)

            files.append({
                'filename': filename,
                'path': path,
                'contents': contents
            })

        for file in files:
            file['handle'] = open(file['path'], 'rb')

        stream = streams.ZipStreamReader(
            *((file['filename'], streams.FileStreamReader(file['handle']))
              for file in files))

        data = yield from stream.read()

        for file in files:
            file['handle'].close()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        for file in files:
            assert zip.open(file['filename']).read() == file['contents']
示例#3
0
async def _upload_parity(path, credentials, settings):
    _, name = os.path.split(path)
    provider_name = settings.get('provider')
    provider = make_provider(provider_name, {}, credentials, settings)
    with open(path, 'rb') as file_pointer:
        stream = streams.FileStreamReader(file_pointer)
        await provider.upload(stream,
                              (await provider.validate_path('/' + name)))
示例#4
0
    def download(self, path, revision=None, **kwargs):
        if not os.path.exists(path.full_path):
            raise exceptions.DownloadError(
                'Could not retrieve file \'{0}\''.format(path),
                code=404,
            )

        file_pointer = open(path.full_path, 'rb')
        return streams.FileStreamReader(file_pointer)
示例#5
0
async def _upload_parity(path, credentials, settings):
    _, name = os.path.split(path)
    provider_name = settings.get('provider')
    provider = make_provider(provider_name, {}, credentials, settings)
    with open(path, 'rb') as file_pointer:
        stream = streams.FileStreamReader(file_pointer)
        stream.add_writer('sha256', streams.HashStreamWriter(hashlib.sha256))
        await provider.upload(stream,
                              (await provider.validate_path('/' + name)))
    return (name, stream.writers['sha256'].hexdigest)
示例#6
0
    async def upload(self, stream, path, **kwargs):
        """Zips the given stream then uploads to Dataverse.
        This will delete existing draft files with the same name.

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse
        :param str path: The filename prepended with '/'

        :rtype: dict, bool
        """

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        zip_stream = streams.ZipStreamReader(
            AsyncIterator([(path.name, stream)]))

        # Write stream to disk (Necessary to find zip file size)
        f = tempfile.TemporaryFile()
        chunk = await zip_stream.read()
        while chunk:
            f.write(chunk)
            chunk = await zip_stream.read()
        file_stream = streams.FileStreamReader(f)

        dv_headers = {
            "Content-Disposition": "filename=temp.zip",
            "Content-Type": "application/zip",
            "Packaging": "http://purl.org/net/sword/package/SimpleZip",
            "Content-Length": str(file_stream.size),
        }

        # Delete old file if it exists
        if path.identifier:
            await self.delete(path)

        resp = await self.make_request('POST',
                                       self.build_url(
                                           settings.EDIT_MEDIA_BASE_URL,
                                           'study', self.doi),
                                       headers=dv_headers,
                                       auth=(self.token, ),
                                       data=file_stream,
                                       expects=(201, ),
                                       throws=exceptions.UploadError)
        await resp.release()

        # Find appropriate version of file
        metadata = await self._get_data('latest')
        files = metadata if isinstance(metadata, list) else []
        file_metadata = next(file for file in files if file.name == path.name)

        if stream.writers['md5'].hexdigest != file_metadata.extra['hashes'][
                'md5']:
            raise exceptions.UploadChecksumMismatchError()

        return file_metadata, path.identifier is None
示例#7
0
    async def _upload_part(self, stream: streams.BaseStream, part_id: str,
                           part_size: int, start_offset: int,
                           session_id: str) -> dict:
        """Upload one part/chunk of the given stream to Box.

        Box requires that the sha of the part be sent along in the headers of the request.  To do
        this WB must write the stream segment to disk before uploading.  The part sha is calculated
        as the tempfile is written.

        API Docs: https://developer.box.com/reference#upload-part
        """

        cutoff_stream = streams.CutoffStream(stream, cutoff=part_size)
        part_hasher_name = 'part-{}-sha1'.format(part_id)
        stream.add_writer(part_hasher_name,
                          streams.HashStreamWriter(hashlib.sha1))

        f = tempfile.TemporaryFile()
        chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        while chunk:
            f.write(chunk)
            chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        file_stream = streams.FileStreamReader(f)

        part_sha = stream.writers[part_hasher_name].digest
        part_sha_b64 = base64.standard_b64encode(part_sha).decode()
        stream.remove_writer(part_hasher_name)

        byte_range = self._build_range_header(
            (start_offset, start_offset + part_size - 1))
        content_range = str(byte_range).replace('=', ' ') + '/{}'.format(
            stream.size)

        async with self.request(
                'PUT',
                self._build_upload_url('files', 'upload_sessions', session_id),
                headers=
            {
                # ``Content-Length`` is required for ``asyncio`` to use inner chunked stream read
                'Content-Length': str(part_size),
                'Content-Range': content_range,
                'Content-Type:': 'application/octet-stream',
                'Digest': 'sha={}'.format(part_sha_b64)
            },
                data=file_stream,
                expects=(201, 200),
                throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        f.close()
        return data['part']
示例#8
0
    async def test_upload_nested_update(self, provider):
        file_name = 'nested.txt'
        file_folder = '/subfolder'
        file_path = os.path.join(file_folder, file_name)
        file_content = b'Test Update Nested Content'
        file_stream = streams.FileStreamReader(io.BytesIO(file_content))

        path = await provider.validate_path(file_path)
        metadata, created = await provider.upload(file_stream, path)

        assert metadata.name == file_name
        assert metadata.path == file_path
        assert metadata.size == len(file_content)
        assert created is False
示例#9
0
    async def test_upload_update(self, provider):
        file_name = 'flower.jpg'
        file_folder = '/'
        file_path = os.path.join(file_folder, file_name)
        file_content = b'Short and stout'
        file_stream = streams.FileStreamReader(io.BytesIO(file_content))

        path = await provider.validate_path(file_path)
        metadata, created = await provider.upload(file_stream, path)

        assert metadata.name == file_name
        assert metadata.path == file_path
        assert metadata.size == len(file_content)
        assert created is False
示例#10
0
    def test_upload_nested_create(self, provider):
        file_name = 'new.txt'
        file_folder = '/newsubfolder'
        file_path = os.path.join(file_folder, file_name)
        file_content = b'Test New Nested Content'
        file_stream = streams.FileStreamReader(io.BytesIO(file_content))

        path = yield from provider.validate_path(file_path)
        metadata, created = yield from provider.upload(file_stream, path)

        assert metadata.name == file_name
        assert metadata.path == file_path
        assert metadata.size == len(file_content)
        assert created is True
示例#11
0
    async def test_file_stream_reader_after_seek(self):
        with open(DUMMY_FILE, 'r') as fp:
            fp.seek(3)
            reader = streams.FileStreamReader(fp)
            assert reader.size == 27  # still gives full size

            assert fp.tell() == 3  # returns to original seek position
            data = await reader.read()
            assert data == 'abcdefghijklmnopqrstuvwxyz\n'  # always reads full data
            at_eof = reader.at_eof()
            assert not at_eof

            data = await reader.read()
            assert data == b''
            at_eof = reader.at_eof()
            assert at_eof
示例#12
0
    def upload(self, stream, path, **kwargs):
        """Zips the given stream then uploads to Dataverse.
        This will delete existing draft files with the same name.

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse
        :param str path: The filename prepended with '/'

        :rtype: dict, bool
        """

        stream = streams.ZipStreamReader((path.name, stream))

        # Write stream to disk (Necessary to find zip file size)
        f = tempfile.TemporaryFile()
        chunk = yield from stream.read()
        while chunk:
            f.write(chunk)
            chunk = yield from stream.read()
        stream = streams.FileStreamReader(f)

        dv_headers = {
            "Content-Disposition": "filename=temp.zip",
            "Content-Type": "application/zip",
            "Packaging": "http://purl.org/net/sword/package/SimpleZip",
            "Content-Length": str(stream.size),
        }

        # Delete old file if it exists
        if path.identifier:
            yield from self.delete(path)

        yield from self.make_request(
            'POST',
            self.build_url(settings.EDIT_MEDIA_BASE_URL, 'study', self.doi),
            headers=dv_headers,
            auth=(self.token, ),
            data=stream,
            expects=(201, ),
            throws=exceptions.UploadError
        )

        # Find appropriate version of file
        metadata = yield from self._get_data('latest')
        files = metadata if isinstance(metadata, list) else []
        file_metadata = next(file for file in files if file['name'] == path.name)

        return file_metadata, path.identifier is None
示例#13
0
    async def test_file_stream_reader(self):
        with open(DUMMY_FILE, 'r') as fp:
            reader = streams.FileStreamReader(fp)
            assert reader.size == 27

            data = await reader.read()
            assert data == 'abcdefghijklmnopqrstuvwxyz\n'
            at_eof = reader.at_eof()
            assert not at_eof

            data = await reader.read()
            assert data == b''
            at_eof = reader.at_eof()
            assert at_eof

            reader.close()
            at_eof = reader.at_eof()
            assert at_eof
            with pytest.raises(ValueError):
                fp.read()
示例#14
0
    async def test_single_large_file(self, temp_files):
        filename = 'foo.txt'
        path = temp_files.add_file(filename)
        random_data = os.urandom(2**18)
        with open(path, 'wb') as f:
            f.write(random_data)

        with open(path, 'rb') as f:

            stream = streams.ZipStreamReader(
                AsyncIterator([(filename, streams.FileStreamReader(f))]))

            data = await stream.read()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        result = zip.open('foo.txt')

        # Check content of included file
        assert result.read() == random_data
示例#15
0
    async def test_file_stream_reader_subset(self):
        with open(DUMMY_FILE, 'r') as fp:
            reader = streams.FileStreamReader(fp)

            data = await reader.read(10)
            assert data == 'abcdefghij'
            at_eof = reader.at_eof()
            assert not at_eof

            data = await reader.read(2)
            assert data == 'kl'
            at_eof = reader.at_eof()
            assert not at_eof

            data = await reader.read()
            assert data == 'mnopqrstuvwxyz\n'
            at_eof = reader.at_eof()
            assert not at_eof

            data = await reader.read()
            assert data == b''
            at_eof = reader.at_eof()
            assert at_eof
示例#16
0
def file_stream(file_like):
    return streams.FileStreamReader(file_like)