Exemplo n.º 1
0
    async def _upload_part(self, stream: streams.BaseStream, part_id: str,
                           part_size: int, start_offset: int,
                           session_id: str) -> dict:
        """Upload one part/chunk of the given stream to Box.

        Box requires that the sha of the part be sent along in the headers of the request.  To do
        this WB must write the stream segment to disk before uploading.  The part sha is calculated
        as the tempfile is written.

        API Docs: https://developer.box.com/reference#upload-part
        """

        cutoff_stream = streams.CutoffStream(stream, cutoff=part_size)
        part_hasher_name = 'part-{}-sha1'.format(part_id)
        stream.add_writer(part_hasher_name,
                          streams.HashStreamWriter(hashlib.sha1))

        f = tempfile.TemporaryFile()
        chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        while chunk:
            f.write(chunk)
            chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        file_stream = streams.FileStreamReader(f)

        part_sha = stream.writers[part_hasher_name].digest
        part_sha_b64 = base64.standard_b64encode(part_sha).decode()
        stream.remove_writer(part_hasher_name)

        byte_range = self._build_range_header(
            (start_offset, start_offset + part_size - 1))
        content_range = str(byte_range).replace('=', ' ') + '/{}'.format(
            stream.size)

        async with self.request(
                'PUT',
                self._build_upload_url('files', 'upload_sessions', session_id),
                headers=
            {
                # ``Content-Length`` is required for ``asyncio`` to use inner chunked stream read
                'Content-Length': str(part_size),
                'Content-Range': content_range,
                'Content-Type:': 'application/octet-stream',
                'Digest': 'sha={}'.format(part_sha_b64)
            },
                data=file_stream,
                expects=(201, 200),
                throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        f.close()
        return data['part']
Exemplo n.º 2
0
    async def _chunked_upload(self, path: WaterButlerPath,
                              stream: streams.BaseStream) -> dict:
        """Upload a large file to Box over multiple requests. This method will be used if the
        file to upload is larger than ``NONCHUNKED_UPLOAD_LIMIT``.  Checksum verification is built
        into this process, so manual verification is not needed.

        API Docs: https://developer.box.com/reference#chunked-upload
        """

        # Step 1: Add a sha1 calculator. The final sha1 will be needed to complete the session
        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        # Step 2: Create an upload session with Box and recieve session id.
        session_data = await self._create_chunked_upload_session(path, stream)
        logger.debug('chunked upload session data: {}'.format(
            json.dumps(session_data)))

        metadata = None

        try:
            # Step 3. Split the data into parts and upload them to box.
            parts_manifest = await self._upload_parts(stream, session_data)
            logger.debug('chunked upload parts manifest: {}'.format(
                json.dumps(parts_manifest)))

            data_sha = base64.standard_b64encode(
                stream.writers['sha1'].digest).decode()

            # Step 4. Complete the session and return the uploaded file's metadata.
            retry = self.UPLOAD_COMMIT_RETRIES
            while retry > 0:
                --retry
                try:
                    metadata = await self._complete_chunked_upload_session(
                        session_data, parts_manifest, data_sha)
                    break
                except RetryChunkedUploadCommit:
                    continue

        except Exception as err:
            msg = 'An unexpected error has occurred during the multi-part upload.'
            logger.error('{} upload_id={} error={!r}'.format(
                msg, session_data, err))
            aborted = await self._abort_chunked_upload(session_data, data_sha)
            if not aborted:
                msg += '  The abort action failed to clean up the temporary file parts generated ' \
                    'during the upload process.  Please manually remove them.'
            raise exceptions.UploadError(msg)

        return metadata
Exemplo n.º 3
0
    async def _upload_part(self, stream: streams.BaseStream, part_id: str, part_size: int,
                           start_offset: int, session_id: str) -> dict:
        """Upload one part/chunk of the given stream to Box.

        Box requires that the sha of the part be sent along in the headers of the request.  To do
        this WB must write the stream segment to disk before uploading.  The part sha is calculated
        as the tempfile is written.

        API Docs: https://developer.box.com/reference#upload-part
        """

        cutoff_stream = streams.CutoffStream(stream, cutoff=part_size)
        part_hasher_name = 'part-{}-sha1'.format(part_id)
        stream.add_writer(part_hasher_name, streams.HashStreamWriter(hashlib.sha1))

        f = tempfile.TemporaryFile()
        chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        while chunk:
            f.write(chunk)
            chunk = await cutoff_stream.read(self.TEMP_CHUNK_SIZE)
        file_stream = streams.FileStreamReader(f)

        part_sha = stream.writers[part_hasher_name].digest
        part_sha_b64 = base64.standard_b64encode(part_sha).decode()
        stream.remove_writer(part_hasher_name)

        byte_range = self._build_range_header((start_offset, start_offset + part_size - 1))
        content_range = str(byte_range).replace('=', ' ') + '/{}'.format(stream.size)

        async with self.request(
            'PUT',
            self._build_upload_url('files', 'upload_sessions', session_id),
            headers={
                # ``Content-Length`` is required for ``asyncio`` to use inner chunked stream read
                'Content-Length': str(part_size),
                'Content-Range': content_range,
                'Content-Type:': 'application/octet-stream',
                'Digest': 'sha={}'.format(part_sha_b64)
            },
            data=file_stream,
            expects=(201, 200),
            throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        f.close()
        return data['part']
Exemplo n.º 4
0
    async def upload(
            self,  # type: ignore
            stream: streams.BaseStream,
            path: WaterButlerPath,
            conflict: str = 'replace',
            **kwargs) -> Tuple[BoxFileMetadata, bool]:
        if path.identifier and conflict == 'keep':
            path, _ = await self.handle_name_conflict(path,
                                                      conflict=conflict,
                                                      kind='folder')
            path._parts[-1]._id = None

        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        data_stream = streams.FormDataStream(
            attributes=json.dumps({
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            }))
        data_stream.add_file('file',
                             stream,
                             path.name,
                             disposition='form-data')

        async with self.request(
                'POST',
                self._build_upload_url(
                    *filter(lambda x: x is not None, ('files', path.identifier,
                                                      'content'))),
                data=data_stream,
                headers=data_stream.headers,
                expects=(201, ),
                throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        entry = data['entries'][0]
        if stream.writers['sha1'].hexdigest != entry['sha1']:
            raise exceptions.UploadChecksumMismatchError()

        created = path.identifier is None
        path._parts[-1]._id = entry['id']
        return BoxFileMetadata(entry, path), created
Exemplo n.º 5
0
    async def _chunked_upload(self, path: WaterButlerPath, stream: streams.BaseStream) -> dict:
        """Upload a large file to Box over multiple requests. This method will be used if the
        file to upload is larger than ``NONCHUNKED_UPLOAD_LIMIT``.  Checksum verification is built
        into this process, so manual verification is not needed.

        API Docs: https://developer.box.com/reference#chunked-upload
        """

        # Step 1: Add a sha1 calculator. The final sha1 will be needed to complete the session
        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        # Step 2: Create an upload session with Box and recieve session id.
        session_data = await self._create_chunked_upload_session(path, stream)
        logger.debug('chunked upload session data: {}'.format(json.dumps(session_data)))

        metadata = None

        try:
            # Step 3. Split the data into parts and upload them to box.
            parts_manifest = await self._upload_parts(stream, session_data)
            logger.debug('chunked upload parts manifest: {}'.format(json.dumps(parts_manifest)))

            data_sha = base64.standard_b64encode(stream.writers['sha1'].digest).decode()

            # Step 4. Complete the session and return the uploaded file's metadata.
            retry = self.UPLOAD_COMMIT_RETRIES
            while retry > 0:
                --retry
                try:
                    metadata = await self._complete_chunked_upload_session(session_data,
                                                                           parts_manifest, data_sha)
                    break
                except RetryChunkedUploadCommit:
                    continue

        except Exception as err:
            msg = 'An unexpected error has occurred during the multi-part upload.'
            logger.error('{} upload_id={} error={!r}'.format(msg, session_data, err))
            aborted = await self._abort_chunked_upload(session_data, data_sha)
            if not aborted:
                msg += '  The abort action failed to clean up the temporary file parts generated ' \
                    'during the upload process.  Please manually remove them.'
            raise exceptions.UploadError(msg)

        return metadata
Exemplo n.º 6
0
    async def _contiguous_upload(self, path: WaterButlerPath,
                                 stream: streams.BaseStream) -> dict:
        """Upload a file to Box using a single request. This will only be called if the file is
        smaller than the ``NONCHUNKED_UPLOAD_LIMIT``.

        API Docs: https://developer.box.com/reference#upload-a-file
        """
        assert stream.size <= self.NONCHUNKED_UPLOAD_LIMIT
        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        data_stream = streams.FormDataStream(
            attributes=json.dumps({
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            }))
        data_stream.add_file('file',
                             stream,
                             path.name,
                             disposition='form-data')

        if path.identifier is not None:
            segments = ['files', path.identifier, 'content']
        else:
            segments = ['files', 'content']

        response = await self.make_request(
            'POST',
            self._build_upload_url(*segments),
            data=data_stream,
            headers=data_stream.headers,
            expects=(201, ),
            throws=exceptions.UploadError,
        )
        data = await response.json()

        entry = data['entries'][0]
        if stream.writers['sha1'].hexdigest != entry['sha1']:
            raise exceptions.UploadChecksumMismatchError()

        return entry
Exemplo n.º 7
0
    async def _contiguous_upload(self, path: WaterButlerPath, stream: streams.BaseStream) -> dict:
        """Upload a file to Box using a single request. This will only be called if the file is
        smaller than the ``NONCHUNKED_UPLOAD_LIMIT``.

        API Docs: https://developer.box.com/reference#upload-a-file
        """
        assert stream.size <= self.NONCHUNKED_UPLOAD_LIMIT
        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        data_stream = streams.FormDataStream(
            attributes=json.dumps({
                'name': path.name,
                'parent': {'id': path.parent.identifier}
            })
        )
        data_stream.add_file('file', stream, path.name, disposition='form-data')

        if path.identifier is not None:
            segments = ['files', path.identifier, 'content']
        else:
            segments = ['files', 'content']

        async with self.request(
            'POST',
            self._build_upload_url(*segments),
            data=data_stream,
            headers=data_stream.headers,
            expects=(201, ),
            throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        entry = data['entries'][0]
        if stream.writers['sha1'].hexdigest != entry['sha1']:
            raise exceptions.UploadChecksumMismatchError()

        return entry
Exemplo n.º 8
0
    async def upload(self, stream: BaseStream, path: WaterButlerPath, *args,
                     **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]:
        """Upload a file stream to the given WaterButlerPath.

        API docs:

            PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object

            Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload

        The response has an empty body. It does not have the required header ``Last-Modified``.
        In addition, the ``Content-Type`` header is for the response itself, not for the file WB
        just uploaded. WB must make an extra metadata request after a successful upload.

        The "etag" header returned by the XML API is exactly the same as the hex-digest of the
        MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash
        headers.

        Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling
        :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To
        Sign".  The signed request would fail and return ``HTTP 403 Forbidden`` with the error
        message ``SignatureDoesNotMatch`` if auto headers were not skipped.

        :param stream: the stream to post
        :type stream: :class:`.streams.BaseStream`
        :param path: the WaterButlerPath of the file to upload
        :type path: :class:`.WaterButlerPath`
        :param list args: additional args are ignored
        :param dict kwargs: additional kwargs are ignored
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await self.exists(path)

        stream.add_writer('md5', HashStreamWriter(hashlib.md5))

        req_method = 'PUT'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{})
        headers = {'Content-Length': str(stream.size)}

        resp = await self.make_request(
            req_method,
            signed_url,
            data=stream,
            skip_auto_headers={'Content-Type'},
            headers=headers,
            expects=(HTTPStatus.OK,),
            throws=UploadError
        )

        await resp.release()

        header_etag = resp.headers.get('etag', None)
        if not header_etag:
            raise UploadError('Missing response header "ETag" for upload.')

        if header_etag.strip('"') != stream.writers['md5'].hexdigest:
            raise UploadChecksumMismatchError()

        metadata = await self._metadata_object(path, is_folder=False)
        return metadata, created  # type: ignore
Exemplo n.º 9
0
    async def upload(self, stream: BaseStream, path: WaterButlerPath, *args,
                     **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]:
        """Upload a file stream to the given WaterButlerPath.

        API docs:

            PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object

            Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload

        The response has an empty body. It does not have the required header ``Last-Modified``.
        In addition, the ``Content-Type`` header is for the response itself, not for the file WB
        just uploaded. WB must make an extra metadata request after a successful upload.

        The "etag" header returned by the XML API is exactly the same as the hex-digest of the
        MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash
        headers.

        Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling
        :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To
        Sign".  The signed request would fail and return ``HTTP 403 Forbidden`` with the error
        message ``SignatureDoesNotMatch`` if auto headers were not skipped.

        :param stream: the stream to post
        :type stream: :class:`.streams.BaseStream`
        :param path: the WaterButlerPath of the file to upload
        :type path: :class:`.WaterButlerPath`
        :param list args: additional args are ignored
        :param dict kwargs: additional kwargs are ignored
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await self.exists(path)

        stream.add_writer('md5', HashStreamWriter(hashlib.md5))

        req_method = 'PUT'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method,
                                       obj_name, **{})
        headers = {'Content-Length': str(stream.size)}

        resp = await self.make_request(req_method,
                                       signed_url,
                                       data=stream,
                                       skip_auto_headers={'Content-Type'},
                                       headers=headers,
                                       expects=(HTTPStatus.OK, ),
                                       throws=UploadError)

        await resp.release()

        header_etag = resp.headers.get('etag', None)
        if not header_etag:
            raise UploadError('Missing response header "ETag" for upload.')

        if header_etag.strip('"') != stream.writers['md5'].hexdigest:
            raise UploadChecksumMismatchError()

        metadata = await self._metadata_object(path, is_folder=False)
        return metadata, created  # type: ignore