async def _intra_copy_file( self, dest_provider: BaseProvider, source_path: WaterButlerPath, dest_path: WaterButlerPath ) -> typing.Tuple[GoogleCloudFileMetadata, bool]: # noqa """Copy files within the same Google Cloud Storage provider, overwrite existing ones if there are any. Return the metadata of the destination file and a flag indicating if the file was created (new) or overwritten (existing). API docs: https://cloud.google.com/storage/docs/xml-api/put-object-copy .. note:: The XML response body contains ``CopyObjectResult``, ``ETag`` and ``LastModified`` of the new file. The response header contains most of the metadata WB needs for the file. However, two pieces are missing/incorrect: ``Content-Type`` and ``Last-Modified``. The metadata can be constructed from the response but current implementation chooses to make a metadata request. *TODO [Phase 2]: if needed, build the metadata from response headers and XML body* :param dest_provider: the destination provider, must be the same as the source one :type dest_provider: :class:`.BaseProvider` :param source_path: the source WaterButlerPath for the object to copy from :type source_path: :class:`.WaterButlerPath` :param dest_path: the destination WaterButlerPath for the object to copy to :type dest_path: :class:`.WaterButlerPath` :rtype: :class:`.GoogleCloudFileMetadata` :rtype: bool """ created = not await dest_provider.exists(dest_path) req_method = 'PUT' headers = {'Content-Length': '0', 'Content-Type': ''} src_obj_name = utils.get_obj_name(source_path, is_folder=False) canonical_ext_headers = { 'x-goog-copy-source': '{}/{}'.format(self.bucket, src_obj_name) } headers.update(canonical_ext_headers) dest_obj_name = utils.get_obj_name(dest_path, is_folder=False) signed_url = functools.partial( self._build_and_sign_url, req_method, dest_obj_name, canonical_ext_headers=canonical_ext_headers, **{}) resp = await self.make_request(req_method, signed_url, headers=headers, expects=(HTTPStatus.OK, ), throws=CopyError) await resp.release() metadata = await self._metadata_object(dest_path, is_folder=False) return metadata, created # type: ignore
async def _intra_copy_file(self, dest_provider: BaseProvider, source_path: WaterButlerPath, dest_path: WaterButlerPath) -> typing.Tuple[GoogleCloudFileMetadata, bool]: # noqa """Copy files within the same Google Cloud Storage provider, overwrite existing ones if there are any. Return the metadata of the destination file and a flag indicating if the file was created (new) or overwritten (existing). API docs: https://cloud.google.com/storage/docs/xml-api/put-object-copy .. note:: The XML response body contains ``CopyObjectResult``, ``ETag`` and ``LastModified`` of the new file. The response header contains most of the metadata WB needs for the file. However, two pieces are missing/incorrect: ``Content-Type`` and ``Last-Modified``. The metadata can be constructed from the response but current implementation chooses to make a metadata request. *TODO [Phase 2]: if needed, build the metadata from response headers and XML body* :param dest_provider: the destination provider, must be the same as the source one :type dest_provider: :class:`.BaseProvider` :param source_path: the source WaterButlerPath for the object to copy from :type source_path: :class:`.WaterButlerPath` :param dest_path: the destination WaterButlerPath for the object to copy to :type dest_path: :class:`.WaterButlerPath` :rtype: :class:`.GoogleCloudFileMetadata` :rtype: bool """ created = not await dest_provider.exists(dest_path) req_method = 'PUT' headers = {'Content-Length': '0', 'Content-Type': ''} src_obj_name = utils.get_obj_name(source_path, is_folder=False) canonical_ext_headers = {'x-goog-copy-source': '{}/{}'.format(self.bucket, src_obj_name)} headers.update(canonical_ext_headers) dest_obj_name = utils.get_obj_name(dest_path, is_folder=False) signed_url = functools.partial( self._build_and_sign_url, req_method, dest_obj_name, canonical_ext_headers=canonical_ext_headers, **{} ) resp = await self.make_request( req_method, signed_url, headers=headers, expects=(HTTPStatus.OK,), throws=CopyError ) await resp.release() metadata = await self._metadata_object(dest_path, is_folder=False) return metadata, created # type: ignore
async def test_upload_file_checksum_mismatch(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_upload_raw, file_stream_file): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url_upload = mock_provider._build_and_sign_url( 'PUT', file_obj_name, **{}) # There is no need to use `MultiDict` since the hashes are not used resp_headers_dict = dict(json.loads(meta_file_upload_raw)) resp_headers_dict.update( {'etag': '"9e780e1c4ee28c44642160b349b3aab0"'}) resp_headers = utils.get_multi_dict_from_python_dict(resp_headers_dict) aiohttpretty.register_uri('PUT', signed_url_upload, headers=resp_headers, status=HTTPStatus.OK) signed_url_metadata = mock_provider._build_and_sign_url( 'HEAD', file_obj_name, **{}) # There is no need to use `MultiDict` since the hashes are not used resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_raw))) aiohttpretty.register_uri('HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK) with pytest.raises(exceptions.UploadChecksumMismatchError) as exc: await mock_provider.upload(file_stream_file, file_wb_path) assert exc.value.code == HTTPStatus.INTERNAL_SERVER_ERROR assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata) assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
async def _delete_file(self, path: WaterButlerPath) -> None: """Deletes the file with the specified WaterButlerPath. API docs: https://cloud.google.com/storage/docs/xml-api/delete-object If WB makes a ``DELETE`` request for an object that doesn't exist, it will receive the ``HTTP 404 Not Found`` status and the error message containing ``NoSuchKey``. :param path: the WaterButlerPath of the file to delete :type path: :class:`.WaterButlerPath` :rtype: None """ req_method = 'DELETE' obj_name = utils.get_obj_name(path, is_folder=False) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request( req_method, signed_url, expects=(HTTPStatus.NO_CONTENT, ), throws=DeleteError, ) await resp.release()
async def test_upload_file_checksum_mismatch(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_upload_raw, file_stream_file): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url_upload = mock_provider._build_and_sign_url('PUT', file_obj_name, **{}) # There is no need to use `MultiDict` since the hashes are not used resp_headers_dict = dict(json.loads(meta_file_upload_raw)) resp_headers_dict.update({'etag': '"9e780e1c4ee28c44642160b349b3aab0"'}) resp_headers = utils.get_multi_dict_from_python_dict(resp_headers_dict) aiohttpretty.register_uri( 'PUT', signed_url_upload, headers=resp_headers, status=HTTPStatus.OK ) signed_url_metadata = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) # There is no need to use `MultiDict` since the hashes are not used resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw))) aiohttpretty.register_uri( 'HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK ) with pytest.raises(exceptions.UploadChecksumMismatchError) as exc: await mock_provider.upload(file_stream_file, file_wb_path) assert exc.value.code == HTTPStatus.INTERNAL_SERVER_ERROR assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata) assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
async def test_upload_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_parsed, meta_file_upload_raw, file_stream_file): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url_upload = mock_provider._build_and_sign_url( 'PUT', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_upload_raw))) aiohttpretty.register_uri('PUT', signed_url_upload, headers=resp_headers, status=HTTPStatus.OK) signed_url_metadata = mock_provider._build_and_sign_url( 'HEAD', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_raw))) aiohttpretty.register_uri('HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata, _ = await mock_provider.upload(file_stream_file, file_wb_path) assert metadata == metadata_expected assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def test_upload_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_parsed, meta_file_upload_raw, file_stream_file): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url_upload = mock_provider._build_and_sign_url('PUT', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_upload_raw))) aiohttpretty.register_uri( 'PUT', signed_url_upload, headers=resp_headers, status=HTTPStatus.OK ) signed_url_metadata = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw))) aiohttpretty.register_uri( 'HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK ) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata, _ = await mock_provider.upload(file_stream_file, file_wb_path) assert metadata == metadata_expected assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def _delete_file(self, path: WaterButlerPath) -> None: """Deletes the file with the specified WaterButlerPath. API docs: https://cloud.google.com/storage/docs/xml-api/delete-object If WB makes a ``DELETE`` request for an object that doesn't exist, it will receive the ``HTTP 404 Not Found`` status and the error message containing ``NoSuchKey``. :param path: the WaterButlerPath of the file to delete :type path: :class:`.WaterButlerPath` :rtype: None """ req_method = 'DELETE' obj_name = utils.get_obj_name(path, is_folder=False) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request( req_method, signed_url, expects=(HTTPStatus.NO_CONTENT,), throws=DeleteError, ) await resp.release()
def test_path_and_obj_name_for_file(self, file_obj_name, file_wb_path): object_name = utils.get_obj_name(file_wb_path) assert object_name == file_obj_name path = utils.build_path(file_obj_name) assert path == '/' + file_wb_path.path
def test_path_and_obj_name_for_folder(self, folder_obj_name, folder_wb_path): object_name = utils.get_obj_name(folder_wb_path, is_folder=True) assert object_name == folder_obj_name path = utils.build_path(folder_obj_name, is_folder=True) assert path == '/' + folder_wb_path.path
async def download( self, path: WaterButlerPath, accept_url=False, range=None, # type: ignore **kwargs) -> typing.Union[str, ResponseStreamReader]: """Download the object with the given path. API Docs: GET Object: https://cloud.google.com/storage/docs/xml-api/get-object Download an Object: https://cloud.google.com/storage/docs/xml-api/get-object-download The behavior of download differs depending on the value of ``accept_url``. If ``accept_url == False``, WB makes a standard signed request and returns a ``ResponseStreamReader``. If ``accept_url == True``, WB builds and signs the ``GET`` request with an extra query parameter ``response-content-disposition`` to trigger the download with the display name. The signed URL is returned. :param path: the WaterButlerPath for the object to download :type path: :class:`.WaterButlerPath` :param bool accept_url: should return a direct time-limited download url from the provider :param tuple range: the Range HTTP request header :param dict kwargs: ``displayName`` - the display name of the file on OSF and for download :rtype: str or :class:`.streams.ResponseStreamReader` """ if path.is_folder: raise DownloadError('Cannot download folders', code=HTTPStatus.BAD_REQUEST) req_method = 'GET' obj_name = utils.get_obj_name(path, is_folder=False) if accept_url: display_name = kwargs.get('displayName', path.name) query = { 'response-content-disposition': 'attachment; filename={}'.format(display_name) } # There is no need to delay URL building and signing signed_url = self._build_and_sign_url(req_method, obj_name, **query) # type: ignore return signed_url signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request(req_method, signed_url, range=range, expects=(HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT), throws=DownloadError) return ResponseStreamReader(resp)
async def test_intra_copy_file(self, mock_time, mock_provider, file_wb_path, file_2_wb_path, meta_file_raw, meta_file_parsed, meta_file_copy_raw): src_file_path = file_2_wb_path dest_file_path = file_wb_path src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False) dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False) object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name) canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket} signed_url_intra_copy = mock_provider._build_and_sign_url( 'PUT', dest_file_obj_name, canonical_ext_headers=canonical_ext_headers, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_copy_raw))) aiohttpretty.register_uri('PUT', signed_url_intra_copy, headers=resp_headers, status=HTTPStatus.OK) signed_url_metadata = mock_provider._build_and_sign_url( 'HEAD', dest_file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_raw))) aiohttpretty.register_uri('HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata, _ = await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path) assert metadata == metadata_expected assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def test_intra_copy_file_not_found(self, mock_time, mock_provider, file_wb_path, file_2_wb_path, meta_file_raw, meta_file_copy_raw): src_file_path = file_2_wb_path dest_file_path = file_wb_path src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False) dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False) object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name) canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket} signed_url_intra_copy = mock_provider._build_and_sign_url( 'PUT', dest_file_obj_name, canonical_ext_headers=canonical_ext_headers, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_copy_raw))) aiohttpretty.register_uri('PUT', signed_url_intra_copy, headers=resp_headers, status=HTTPStatus.NOT_FOUND) signed_url_metadata = mock_provider._build_and_sign_url( 'HEAD', dest_file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_raw))) aiohttpretty.register_uri('HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK) with pytest.raises(exceptions.CopyError) as exc: await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path) assert exc.value.code == HTTPStatus.NOT_FOUND assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def test_download_file_with_accept_url(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) query = { 'response-content-disposition': ('attachment; filename="text-file-1.txt"; ' 'filename*=UTF-8\'\'text-file-1.txt') } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True) assert not aiohttpretty.has_call(method='GET', uri=signed_url) assert isinstance(return_url, str) assert signed_url == return_url
async def test_intra_copy_file(self, mock_time, mock_provider, file_wb_path, file_2_wb_path, meta_file_raw, meta_file_parsed, meta_file_copy_raw): src_file_path = file_2_wb_path dest_file_path = file_wb_path src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False) dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False) object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name) canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket} signed_url_intra_copy = mock_provider._build_and_sign_url( 'PUT', dest_file_obj_name, canonical_ext_headers=canonical_ext_headers, **{} ) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_copy_raw))) aiohttpretty.register_uri( 'PUT', signed_url_intra_copy, headers=resp_headers, status=HTTPStatus.OK ) signed_url_metadata = mock_provider._build_and_sign_url('HEAD', dest_file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw))) aiohttpretty.register_uri( 'HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK ) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata, _ = await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path) assert metadata == metadata_expected assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def test_delete_file(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('DELETE', file_obj_name, **{}) aiohttpretty.register_uri('DELETE', signed_url, status=HTTPStatus.NO_CONTENT) await mock_provider.delete(file_wb_path) assert aiohttpretty.has_call(method='DELETE', uri=signed_url)
async def test_intra_copy_file_not_found(self, mock_time, mock_provider, file_wb_path, file_2_wb_path, meta_file_raw, meta_file_copy_raw): src_file_path = file_2_wb_path dest_file_path = file_wb_path src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False) dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False) object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name) canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket} signed_url_intra_copy = mock_provider._build_and_sign_url( 'PUT', dest_file_obj_name, canonical_ext_headers=canonical_ext_headers, **{} ) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_copy_raw))) aiohttpretty.register_uri( 'PUT', signed_url_intra_copy, headers=resp_headers, status=HTTPStatus.NOT_FOUND ) signed_url_metadata = mock_provider._build_and_sign_url('HEAD', dest_file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw))) aiohttpretty.register_uri( 'HEAD', signed_url_metadata, headers=resp_headers, status=HTTPStatus.OK ) with pytest.raises(exceptions.CopyError) as exc: await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path) assert exc.value.code == HTTPStatus.NOT_FOUND assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy) assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
async def test_delete_file(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('DELETE', file_obj_name, **{}) aiohttpretty.register_uri( 'DELETE', signed_url, status=HTTPStatus.NO_CONTENT ) await mock_provider.delete(file_wb_path) assert aiohttpretty.has_call(method='DELETE', uri=signed_url)
async def _metadata_object(self, path: WaterButlerPath, is_folder: bool=False) \ -> typing.Union[GoogleCloudFileMetadata, GoogleCloudFolderMetadata]: """Get the metadata about the object with the given WaterButlerPath. API docs: GET Object: https://cloud.google.com/storage/docs/xml-api/get-object HEAD Object: https://cloud.google.com/storage/docs/xml-api/head-object .. note:: Use ``HEAD`` instead of ``GET`` to retrieve the metadata of an object. Google points out that: "You should not use a ``GET`` object request to retrieve only non-ACL metadata, because doing so incurs egress charges associated with downloading the entire object. Instead use a ``HEAD`` object request to retrieve non-ACL metadata for the object." .. note:: The flag ``is_folder`` is explicitly used. Providing the wrong type will always fail. This is the case for many internal/private methods of and helper/utility functions for this class. They are not exposed to any outside usage, including the parent classes. :param path: the WaterButlerPath of the object :type path: :class:`.WaterButlerPath` :param bool is_folder: whether the object is a file or folder :rtype: :class:`.GoogleCloudFileMetadata` :rtype: :class:`.GoogleCloudFolderMetadata` """ req_method = 'HEAD' obj_name = utils.get_obj_name(path, is_folder=is_folder) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request(req_method, signed_url, expects=(HTTPStatus.OK, ), throws=MetadataError) await resp.release() if is_folder: return GoogleCloudFolderMetadata.new_from_resp_headers( obj_name, resp.headers) else: return GoogleCloudFileMetadata.new_from_resp_headers( obj_name, resp.headers)
async def test_metadata_object_404_not_found(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) aiohttpretty.register_uri( 'HEAD', signed_url, status=HTTPStatus.NOT_FOUND ) with pytest.raises(exceptions.MetadataError) as exc: await mock_provider._metadata_object(file_wb_path, is_folder=False) assert exc.value.code == HTTPStatus.NOT_FOUND assert aiohttpretty.has_call(method='HEAD', uri=signed_url)
async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path, display_name_arg, expected_name): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) query = { 'response-content-disposition': ('attachment; filename="{}"; ' 'filename*=UTF-8\'\'{}').format(expected_name, expected_name) } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True, display_name=display_name_arg) assert not aiohttpretty.has_call(method='GET', uri=signed_url) assert isinstance(return_url, str) assert signed_url == return_url
async def test_metadata_object_404_not_found(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) aiohttpretty.register_uri('HEAD', signed_url, status=HTTPStatus.NOT_FOUND) with pytest.raises(exceptions.MetadataError) as exc: await mock_provider._metadata_object(file_wb_path, is_folder=False) assert exc.value.code == HTTPStatus.NOT_FOUND assert aiohttpretty.has_call(method='HEAD', uri=signed_url)
async def _metadata_object(self, path: WaterButlerPath, is_folder: bool=False) \ -> typing.Union[GoogleCloudFileMetadata, GoogleCloudFolderMetadata]: """Get the metadata about the object with the given WaterButlerPath. API docs: GET Object: https://cloud.google.com/storage/docs/xml-api/get-object HEAD Object: https://cloud.google.com/storage/docs/xml-api/head-object .. note:: Use ``HEAD`` instead of ``GET`` to retrieve the metadata of an object. Google points out that: "You should not use a ``GET`` object request to retrieve only non-ACL metadata, because doing so incurs egress charges associated with downloading the entire object. Instead use a ``HEAD`` object request to retrieve non-ACL metadata for the object." .. note:: The flag ``is_folder`` is explicitly used. Providing the wrong type will always fail. This is the case for many internal/private methods of and helper/utility functions for this class. They are not exposed to any outside usage, including the parent classes. :param path: the WaterButlerPath of the object :type path: :class:`.WaterButlerPath` :param bool is_folder: whether the object is a file or folder :rtype: :class:`.GoogleCloudFileMetadata` :rtype: :class:`.GoogleCloudFolderMetadata` """ req_method = 'HEAD' obj_name = utils.get_obj_name(path, is_folder=is_folder) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request( req_method, signed_url, expects=(HTTPStatus.OK,), throws=MetadataError ) await resp.release() if is_folder: return GoogleCloudFolderMetadata.new_from_resp_headers(obj_name, resp.headers) else: return GoogleCloudFileMetadata.new_from_resp_headers(obj_name, resp.headers)
async def download(self, path: WaterButlerPath, accept_url=False, range=None, # type: ignore **kwargs) -> typing.Union[str, ResponseStreamReader]: """Download the object with the given path. API Docs: GET Object: https://cloud.google.com/storage/docs/xml-api/get-object Download an Object: https://cloud.google.com/storage/docs/xml-api/get-object-download The behavior of download differs depending on the value of ``accept_url``. If ``accept_url == False``, WB makes a standard signed request and returns a ``ResponseStreamReader``. If ``accept_url == True``, WB builds and signs the ``GET`` request with an extra query parameter ``response-content-disposition`` to trigger the download with the display name. The signed URL is returned. :param path: the WaterButlerPath for the object to download :type path: :class:`.WaterButlerPath` :param bool accept_url: should return a direct time-limited download url from the provider :param tuple range: the Range HTTP request header :param dict kwargs: ``display_name`` - the display name of the file on OSF and for download :rtype: str or :class:`.streams.ResponseStreamReader` """ if path.is_folder: raise DownloadError('Cannot download folders', code=HTTPStatus.BAD_REQUEST) req_method = 'GET' obj_name = utils.get_obj_name(path, is_folder=False) if accept_url: display_name = kwargs.get('display_name') or path.name query = {'response-content-disposition': make_disposition(display_name)} # There is no need to delay URL building and signing signed_url = self._build_and_sign_url(req_method, obj_name, **query) # type: ignore return signed_url signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) resp = await self.make_request( req_method, signed_url, range=range, expects=(HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT), throws=DownloadError ) return ResponseStreamReader(resp)
async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_raw): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **{}) aiohttpretty.register_uri('GET', signed_url, body=file_raw, status=HTTPStatus.OK) resp_stream_reader = await mock_provider.download(file_wb_path) file_content = await resp_stream_reader.read() assert aiohttpretty.has_call(method='GET', uri=signed_url) assert isinstance(resp_stream_reader, ResponseStreamReader) assert file_content == file_raw
async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_raw): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **{}) aiohttpretty.register_uri( 'GET', signed_url, body=file_raw, status=HTTPStatus.OK ) resp_stream_reader = await mock_provider.download(file_wb_path) file_content = await resp_stream_reader.read() assert aiohttpretty.has_call(method='GET', uri=signed_url) assert isinstance(resp_stream_reader, ResponseStreamReader) assert file_content == file_raw
async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path, display_name_arg, expected_name): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) query = { 'response-content-disposition': ('attachment; filename="{}"; ' 'filename*=UTF-8\'\'{}').format(expected_name, expected_name) } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download( file_wb_path, accept_url=True, display_name=display_name_arg) assert not aiohttpretty.has_call(method='GET', uri=signed_url) assert isinstance(return_url, str) assert signed_url == return_url
async def test_metadata_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_parsed): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw))) aiohttpretty.register_uri( 'HEAD', signed_url, headers=resp_headers, status=HTTPStatus.OK ) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata = await mock_provider._metadata_object(file_wb_path, is_folder=False) assert isinstance(metadata, GoogleCloudFileMetadata) assert metadata == metadata_expected
async def test_metadata_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw, meta_file_parsed): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{}) resp_headers = utils.get_multi_dict_from_python_dict( dict(json.loads(meta_file_raw))) aiohttpretty.register_uri('HEAD', signed_url, headers=resp_headers, status=HTTPStatus.OK) metadata_json = json.loads(meta_file_parsed) metadata_expected = GoogleCloudFileMetadata(metadata_json) metadata = await mock_provider._metadata_object(file_wb_path, is_folder=False) assert isinstance(metadata, GoogleCloudFileMetadata) assert metadata == metadata_expected
async def upload(self, stream: BaseStream, path: WaterButlerPath, *args, **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]: """Upload a file stream to the given WaterButlerPath. API docs: PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload The response has an empty body. It does not have the required header ``Last-Modified``. In addition, the ``Content-Type`` header is for the response itself, not for the file WB just uploaded. WB must make an extra metadata request after a successful upload. The "etag" header returned by the XML API is exactly the same as the hex-digest of the MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash headers. Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To Sign". The signed request would fail and return ``HTTP 403 Forbidden`` with the error message ``SignatureDoesNotMatch`` if auto headers were not skipped. :param stream: the stream to post :type stream: :class:`.streams.BaseStream` :param path: the WaterButlerPath of the file to upload :type path: :class:`.WaterButlerPath` :param list args: additional args are ignored :param dict kwargs: additional kwargs are ignored :rtype: :class:`.GoogleCloudFileMetadata` :rtype: bool """ created = not await self.exists(path) stream.add_writer('md5', HashStreamWriter(hashlib.md5)) req_method = 'PUT' obj_name = utils.get_obj_name(path, is_folder=False) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) headers = {'Content-Length': str(stream.size)} resp = await self.make_request( req_method, signed_url, data=stream, skip_auto_headers={'Content-Type'}, headers=headers, expects=(HTTPStatus.OK,), throws=UploadError ) await resp.release() header_etag = resp.headers.get('etag', None) if not header_etag: raise UploadError('Missing response header "ETag" for upload.') if header_etag.strip('"') != stream.writers['md5'].hexdigest: raise UploadChecksumMismatchError() metadata = await self._metadata_object(path, is_folder=False) return metadata, created # type: ignore
async def upload(self, stream: BaseStream, path: WaterButlerPath, *args, **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]: """Upload a file stream to the given WaterButlerPath. API docs: PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload The response has an empty body. It does not have the required header ``Last-Modified``. In addition, the ``Content-Type`` header is for the response itself, not for the file WB just uploaded. WB must make an extra metadata request after a successful upload. The "etag" header returned by the XML API is exactly the same as the hex-digest of the MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash headers. Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To Sign". The signed request would fail and return ``HTTP 403 Forbidden`` with the error message ``SignatureDoesNotMatch`` if auto headers were not skipped. :param stream: the stream to post :type stream: :class:`.streams.BaseStream` :param path: the WaterButlerPath of the file to upload :type path: :class:`.WaterButlerPath` :param list args: additional args are ignored :param dict kwargs: additional kwargs are ignored :rtype: :class:`.GoogleCloudFileMetadata` :rtype: bool """ created = not await self.exists(path) stream.add_writer('md5', HashStreamWriter(hashlib.md5)) req_method = 'PUT' obj_name = utils.get_obj_name(path, is_folder=False) signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{}) headers = {'Content-Length': str(stream.size)} resp = await self.make_request(req_method, signed_url, data=stream, skip_auto_headers={'Content-Type'}, headers=headers, expects=(HTTPStatus.OK, ), throws=UploadError) await resp.release() header_etag = resp.headers.get('etag', None) if not header_etag: raise UploadError('Missing response header "ETag" for upload.') if header_etag.strip('"') != stream.writers['md5'].hexdigest: raise UploadChecksumMismatchError() metadata = await self._metadata_object(path, is_folder=False) return metadata, created # type: ignore