예제 #1
0
    async def _intra_copy_file(
        self, dest_provider: BaseProvider, source_path: WaterButlerPath,
        dest_path: WaterButlerPath
    ) -> typing.Tuple[GoogleCloudFileMetadata, bool]:  # noqa
        """Copy files within the same Google Cloud Storage provider, overwrite existing ones if
        there are any.  Return the metadata of the destination file and a flag indicating if the
        file was created (new) or overwritten (existing).

        API docs: https://cloud.google.com/storage/docs/xml-api/put-object-copy

        .. note::

            The XML response body contains ``CopyObjectResult``, ``ETag`` and ``LastModified`` of
            the new file.  The response header contains most of the metadata WB needs for the file.
            However, two pieces are missing/incorrect: ``Content-Type`` and ``Last-Modified``.  The
            metadata can be constructed from the response but current implementation chooses to make
            a metadata request.

            *TODO [Phase 2]: if needed, build the metadata from response headers and XML body*

        :param dest_provider: the destination provider, must be the same as the source one
        :type dest_provider: :class:`.BaseProvider`
        :param source_path: the source WaterButlerPath for the object to copy from
        :type source_path: :class:`.WaterButlerPath`
        :param dest_path: the destination WaterButlerPath for the object to copy to
        :type dest_path: :class:`.WaterButlerPath`
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await dest_provider.exists(dest_path)

        req_method = 'PUT'
        headers = {'Content-Length': '0', 'Content-Type': ''}

        src_obj_name = utils.get_obj_name(source_path, is_folder=False)
        canonical_ext_headers = {
            'x-goog-copy-source': '{}/{}'.format(self.bucket, src_obj_name)
        }
        headers.update(canonical_ext_headers)

        dest_obj_name = utils.get_obj_name(dest_path, is_folder=False)
        signed_url = functools.partial(
            self._build_and_sign_url,
            req_method,
            dest_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{})

        resp = await self.make_request(req_method,
                                       signed_url,
                                       headers=headers,
                                       expects=(HTTPStatus.OK, ),
                                       throws=CopyError)

        await resp.release()

        metadata = await self._metadata_object(dest_path, is_folder=False)

        return metadata, created  # type: ignore
예제 #2
0
    async def _intra_copy_file(self, dest_provider: BaseProvider, source_path: WaterButlerPath,
                               dest_path: WaterButlerPath) -> typing.Tuple[GoogleCloudFileMetadata, bool]:  # noqa
        """Copy files within the same Google Cloud Storage provider, overwrite existing ones if
        there are any.  Return the metadata of the destination file and a flag indicating if the
        file was created (new) or overwritten (existing).

        API docs: https://cloud.google.com/storage/docs/xml-api/put-object-copy

        .. note::

            The XML response body contains ``CopyObjectResult``, ``ETag`` and ``LastModified`` of
            the new file.  The response header contains most of the metadata WB needs for the file.
            However, two pieces are missing/incorrect: ``Content-Type`` and ``Last-Modified``.  The
            metadata can be constructed from the response but current implementation chooses to make
            a metadata request.

            *TODO [Phase 2]: if needed, build the metadata from response headers and XML body*

        :param dest_provider: the destination provider, must be the same as the source one
        :type dest_provider: :class:`.BaseProvider`
        :param source_path: the source WaterButlerPath for the object to copy from
        :type source_path: :class:`.WaterButlerPath`
        :param dest_path: the destination WaterButlerPath for the object to copy to
        :type dest_path: :class:`.WaterButlerPath`
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await dest_provider.exists(dest_path)

        req_method = 'PUT'
        headers = {'Content-Length': '0', 'Content-Type': ''}

        src_obj_name = utils.get_obj_name(source_path, is_folder=False)
        canonical_ext_headers = {'x-goog-copy-source': '{}/{}'.format(self.bucket, src_obj_name)}
        headers.update(canonical_ext_headers)

        dest_obj_name = utils.get_obj_name(dest_path, is_folder=False)
        signed_url = functools.partial(
            self._build_and_sign_url,
            req_method,
            dest_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{}
        )

        resp = await self.make_request(
            req_method,
            signed_url,
            headers=headers,
            expects=(HTTPStatus.OK,),
            throws=CopyError
        )

        await resp.release()

        metadata = await self._metadata_object(dest_path, is_folder=False)

        return metadata, created  # type: ignore
예제 #3
0
    async def test_upload_file_checksum_mismatch(self, mock_time,
                                                 mock_provider, file_wb_path,
                                                 meta_file_raw,
                                                 meta_file_upload_raw,
                                                 file_stream_file):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)

        signed_url_upload = mock_provider._build_and_sign_url(
            'PUT', file_obj_name, **{})
        # There is no need to use `MultiDict` since the hashes are not used
        resp_headers_dict = dict(json.loads(meta_file_upload_raw))
        resp_headers_dict.update(
            {'etag': '"9e780e1c4ee28c44642160b349b3aab0"'})
        resp_headers = utils.get_multi_dict_from_python_dict(resp_headers_dict)
        aiohttpretty.register_uri('PUT',
                                  signed_url_upload,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        signed_url_metadata = mock_provider._build_and_sign_url(
            'HEAD', file_obj_name, **{})
        # There is no need to use `MultiDict` since the hashes are not used
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri('HEAD',
                                  signed_url_metadata,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        with pytest.raises(exceptions.UploadChecksumMismatchError) as exc:
            await mock_provider.upload(file_stream_file, file_wb_path)

        assert exc.value.code == HTTPStatus.INTERNAL_SERVER_ERROR
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
예제 #4
0
    async def _delete_file(self, path: WaterButlerPath) -> None:
        """Deletes the file with the specified WaterButlerPath.

        API docs: https://cloud.google.com/storage/docs/xml-api/delete-object

        If WB makes a ``DELETE`` request for an object that doesn't exist, it will receive the
        ``HTTP 404 Not Found`` status and the error message containing ``NoSuchKey``.

        :param path: the WaterButlerPath of the file to delete
        :type path: :class:`.WaterButlerPath`
        :rtype: None
        """

        req_method = 'DELETE'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method,
                                       obj_name, **{})

        resp = await self.make_request(
            req_method,
            signed_url,
            expects=(HTTPStatus.NO_CONTENT, ),
            throws=DeleteError,
        )

        await resp.release()
    async def test_upload_file_checksum_mismatch(self, mock_time, mock_provider, file_wb_path,
                                                 meta_file_raw, meta_file_upload_raw,
                                                 file_stream_file):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)

        signed_url_upload = mock_provider._build_and_sign_url('PUT', file_obj_name, **{})
        # There is no need to use `MultiDict` since the hashes are not used
        resp_headers_dict = dict(json.loads(meta_file_upload_raw))
        resp_headers_dict.update({'etag': '"9e780e1c4ee28c44642160b349b3aab0"'})
        resp_headers = utils.get_multi_dict_from_python_dict(resp_headers_dict)
        aiohttpretty.register_uri(
            'PUT',
            signed_url_upload,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        signed_url_metadata = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{})
        # There is no need to use `MultiDict` since the hashes are not used
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri(
            'HEAD',
            signed_url_metadata,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        with pytest.raises(exceptions.UploadChecksumMismatchError) as exc:
            await mock_provider.upload(file_stream_file, file_wb_path)

        assert exc.value.code == HTTPStatus.INTERNAL_SERVER_ERROR
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
예제 #6
0
    async def test_upload_file(self, mock_time, mock_provider, file_wb_path,
                               meta_file_raw, meta_file_parsed,
                               meta_file_upload_raw, file_stream_file):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)

        signed_url_upload = mock_provider._build_and_sign_url(
            'PUT', file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_upload_raw)))
        aiohttpretty.register_uri('PUT',
                                  signed_url_upload,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        signed_url_metadata = mock_provider._build_and_sign_url(
            'HEAD', file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri('HEAD',
                                  signed_url_metadata,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata, _ = await mock_provider.upload(file_stream_file,
                                                 file_wb_path)

        assert metadata == metadata_expected
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
    async def test_upload_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw,
                               meta_file_parsed, meta_file_upload_raw, file_stream_file):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)

        signed_url_upload = mock_provider._build_and_sign_url('PUT', file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_upload_raw)))
        aiohttpretty.register_uri(
            'PUT',
            signed_url_upload,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        signed_url_metadata = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri(
            'HEAD',
            signed_url_metadata,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata, _ = await mock_provider.upload(file_stream_file, file_wb_path)

        assert metadata == metadata_expected
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_upload)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
예제 #8
0
    async def _delete_file(self, path: WaterButlerPath) -> None:
        """Deletes the file with the specified WaterButlerPath.

        API docs: https://cloud.google.com/storage/docs/xml-api/delete-object

        If WB makes a ``DELETE`` request for an object that doesn't exist, it will receive the
        ``HTTP 404 Not Found`` status and the error message containing ``NoSuchKey``.

        :param path: the WaterButlerPath of the file to delete
        :type path: :class:`.WaterButlerPath`
        :rtype: None
        """

        req_method = 'DELETE'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{})

        resp = await self.make_request(
            req_method,
            signed_url,
            expects=(HTTPStatus.NO_CONTENT,),
            throws=DeleteError,
        )

        await resp.release()
예제 #9
0
    def test_path_and_obj_name_for_file(self, file_obj_name, file_wb_path):

        object_name = utils.get_obj_name(file_wb_path)
        assert object_name == file_obj_name

        path = utils.build_path(file_obj_name)
        assert path == '/' + file_wb_path.path
예제 #10
0
    def test_path_and_obj_name_for_folder(self, folder_obj_name,
                                          folder_wb_path):

        object_name = utils.get_obj_name(folder_wb_path, is_folder=True)
        assert object_name == folder_obj_name

        path = utils.build_path(folder_obj_name, is_folder=True)
        assert path == '/' + folder_wb_path.path
예제 #11
0
    async def download(
            self,
            path: WaterButlerPath,
            accept_url=False,
            range=None,  # type: ignore
            **kwargs) -> typing.Union[str, ResponseStreamReader]:
        """Download the object with the given path.


        API Docs:

            GET Object: https://cloud.google.com/storage/docs/xml-api/get-object

            Download an Object: https://cloud.google.com/storage/docs/xml-api/get-object-download

        The behavior of download differs depending on the value of ``accept_url``.  If
        ``accept_url == False``, WB makes a standard signed request and returns a
        ``ResponseStreamReader``.  If ``accept_url == True``, WB builds and signs the ``GET``
        request with an extra query parameter ``response-content-disposition`` to trigger the
        download with the display name.  The signed URL is returned.

        :param path: the WaterButlerPath for the object to download
        :type path: :class:`.WaterButlerPath`
        :param bool accept_url: should return a direct time-limited download url from the provider
        :param tuple range: the Range HTTP request header
        :param dict kwargs: ``displayName`` - the display name of the file on OSF and for download
        :rtype: str or :class:`.streams.ResponseStreamReader`
        """

        if path.is_folder:
            raise DownloadError('Cannot download folders',
                                code=HTTPStatus.BAD_REQUEST)

        req_method = 'GET'
        obj_name = utils.get_obj_name(path, is_folder=False)

        if accept_url:
            display_name = kwargs.get('displayName', path.name)
            query = {
                'response-content-disposition':
                'attachment; filename={}'.format(display_name)
            }
            # There is no need to delay URL building and signing
            signed_url = self._build_and_sign_url(req_method, obj_name,
                                                  **query)  # type: ignore
            return signed_url

        signed_url = functools.partial(self._build_and_sign_url, req_method,
                                       obj_name, **{})
        resp = await self.make_request(req_method,
                                       signed_url,
                                       range=range,
                                       expects=(HTTPStatus.OK,
                                                HTTPStatus.PARTIAL_CONTENT),
                                       throws=DownloadError)
        return ResponseStreamReader(resp)
예제 #12
0
    async def test_intra_copy_file(self, mock_time, mock_provider,
                                   file_wb_path, file_2_wb_path, meta_file_raw,
                                   meta_file_parsed, meta_file_copy_raw):
        src_file_path = file_2_wb_path
        dest_file_path = file_wb_path
        src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False)
        dest_file_obj_name = utils.get_obj_name(dest_file_path,
                                                is_folder=False)

        object_name_with_bucket = '{}/{}'.format(mock_provider.bucket,
                                                 src_file_obj_name)
        canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket}
        signed_url_intra_copy = mock_provider._build_and_sign_url(
            'PUT',
            dest_file_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_copy_raw)))
        aiohttpretty.register_uri('PUT',
                                  signed_url_intra_copy,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        signed_url_metadata = mock_provider._build_and_sign_url(
            'HEAD', dest_file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri('HEAD',
                                  signed_url_metadata,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata, _ = await mock_provider.intra_copy(mock_provider,
                                                     src_file_path,
                                                     dest_file_path)

        assert metadata == metadata_expected
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
예제 #13
0
    async def test_intra_copy_file_not_found(self, mock_time, mock_provider,
                                             file_wb_path, file_2_wb_path,
                                             meta_file_raw,
                                             meta_file_copy_raw):
        src_file_path = file_2_wb_path
        dest_file_path = file_wb_path
        src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False)
        dest_file_obj_name = utils.get_obj_name(dest_file_path,
                                                is_folder=False)

        object_name_with_bucket = '{}/{}'.format(mock_provider.bucket,
                                                 src_file_obj_name)
        canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket}
        signed_url_intra_copy = mock_provider._build_and_sign_url(
            'PUT',
            dest_file_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_copy_raw)))
        aiohttpretty.register_uri('PUT',
                                  signed_url_intra_copy,
                                  headers=resp_headers,
                                  status=HTTPStatus.NOT_FOUND)

        signed_url_metadata = mock_provider._build_and_sign_url(
            'HEAD', dest_file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri('HEAD',
                                  signed_url_metadata,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        with pytest.raises(exceptions.CopyError) as exc:
            await mock_provider.intra_copy(mock_provider, src_file_path,
                                           dest_file_path)

        assert exc.value.code == HTTPStatus.NOT_FOUND
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
    async def test_download_file_with_accept_url(self, mock_time, mock_provider, file_wb_path):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        query = {
            'response-content-disposition': ('attachment; filename="text-file-1.txt"; '
                                             'filename*=UTF-8\'\'text-file-1.txt')
        }
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query)
        return_url = await mock_provider.download(file_wb_path, accept_url=True)

        assert not aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(return_url, str)
        assert signed_url == return_url
    async def test_intra_copy_file(self, mock_time, mock_provider, file_wb_path, file_2_wb_path,
                                   meta_file_raw, meta_file_parsed, meta_file_copy_raw):
        src_file_path = file_2_wb_path
        dest_file_path = file_wb_path
        src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False)
        dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False)

        object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name)
        canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket}
        signed_url_intra_copy = mock_provider._build_and_sign_url(
            'PUT',
            dest_file_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{}
        )
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_copy_raw)))
        aiohttpretty.register_uri(
            'PUT',
            signed_url_intra_copy,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        signed_url_metadata = mock_provider._build_and_sign_url('HEAD', dest_file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri(
            'HEAD',
            signed_url_metadata,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata, _ = await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path)

        assert metadata == metadata_expected
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
예제 #16
0
    async def test_delete_file(self, mock_time, mock_provider, file_wb_path):

        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('DELETE', file_obj_name,
                                                       **{})

        aiohttpretty.register_uri('DELETE',
                                  signed_url,
                                  status=HTTPStatus.NO_CONTENT)

        await mock_provider.delete(file_wb_path)

        assert aiohttpretty.has_call(method='DELETE', uri=signed_url)
    async def test_intra_copy_file_not_found(self, mock_time, mock_provider, file_wb_path,
                                             file_2_wb_path, meta_file_raw, meta_file_copy_raw):
        src_file_path = file_2_wb_path
        dest_file_path = file_wb_path
        src_file_obj_name = utils.get_obj_name(src_file_path, is_folder=False)
        dest_file_obj_name = utils.get_obj_name(dest_file_path, is_folder=False)

        object_name_with_bucket = '{}/{}'.format(mock_provider.bucket, src_file_obj_name)
        canonical_ext_headers = {'x-goog-copy-source': object_name_with_bucket}
        signed_url_intra_copy = mock_provider._build_and_sign_url(
            'PUT',
            dest_file_obj_name,
            canonical_ext_headers=canonical_ext_headers,
            **{}
        )
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_copy_raw)))
        aiohttpretty.register_uri(
            'PUT',
            signed_url_intra_copy,
            headers=resp_headers,
            status=HTTPStatus.NOT_FOUND
        )

        signed_url_metadata = mock_provider._build_and_sign_url('HEAD', dest_file_obj_name, **{})
        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri(
            'HEAD',
            signed_url_metadata,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        with pytest.raises(exceptions.CopyError) as exc:
            await mock_provider.intra_copy(mock_provider, src_file_path, dest_file_path)

        assert exc.value.code == HTTPStatus.NOT_FOUND
        assert aiohttpretty.has_call(method='PUT', uri=signed_url_intra_copy)
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url_metadata)
    async def test_delete_file(self, mock_time, mock_provider, file_wb_path):

        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('DELETE', file_obj_name, **{})

        aiohttpretty.register_uri(
            'DELETE',
            signed_url,
            status=HTTPStatus.NO_CONTENT
        )

        await mock_provider.delete(file_wb_path)

        assert aiohttpretty.has_call(method='DELETE', uri=signed_url)
예제 #19
0
    async def _metadata_object(self, path: WaterButlerPath,
                               is_folder: bool=False) \
                               -> typing.Union[GoogleCloudFileMetadata, GoogleCloudFolderMetadata]:
        """Get the metadata about the object with the given WaterButlerPath.

        API docs:

            GET Object: https://cloud.google.com/storage/docs/xml-api/get-object

            HEAD Object: https://cloud.google.com/storage/docs/xml-api/head-object

        .. note::

            Use ``HEAD`` instead of ``GET`` to retrieve the metadata of an object.  Google points
            out that:  "You should not use a ``GET`` object request to retrieve only non-ACL
            metadata, because doing so incurs egress charges associated with downloading the entire
            object.  Instead use a ``HEAD`` object request to retrieve non-ACL metadata for the
            object."

        .. note::

            The flag ``is_folder`` is explicitly used.  Providing the wrong type will always fail.
            This is the case for many internal/private methods of and helper/utility functions for
            this class. They are not exposed to any outside usage, including the parent classes.

        :param path: the WaterButlerPath of the object
        :type path: :class:`.WaterButlerPath`
        :param bool is_folder: whether the object is a file or folder
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: :class:`.GoogleCloudFolderMetadata`
        """

        req_method = 'HEAD'
        obj_name = utils.get_obj_name(path, is_folder=is_folder)
        signed_url = functools.partial(self._build_and_sign_url, req_method,
                                       obj_name, **{})

        resp = await self.make_request(req_method,
                                       signed_url,
                                       expects=(HTTPStatus.OK, ),
                                       throws=MetadataError)
        await resp.release()

        if is_folder:
            return GoogleCloudFolderMetadata.new_from_resp_headers(
                obj_name, resp.headers)
        else:
            return GoogleCloudFileMetadata.new_from_resp_headers(
                obj_name, resp.headers)
    async def test_metadata_object_404_not_found(self, mock_time, mock_provider, file_wb_path):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{})

        aiohttpretty.register_uri(
            'HEAD',
            signed_url,
            status=HTTPStatus.NOT_FOUND
        )

        with pytest.raises(exceptions.MetadataError) as exc:
            await mock_provider._metadata_object(file_wb_path, is_folder=False)

        assert exc.value.code == HTTPStatus.NOT_FOUND
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url)
    async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path,
                                                   display_name_arg, expected_name):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        query = {
            'response-content-disposition': ('attachment; filename="{}"; '
                                             'filename*=UTF-8\'\'{}').format(expected_name,
                                                                             expected_name)
        }
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query)
        return_url = await mock_provider.download(file_wb_path, accept_url=True,
                                                  display_name=display_name_arg)

        assert not aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(return_url, str)
        assert signed_url == return_url
예제 #22
0
    async def test_metadata_object_404_not_found(self, mock_time,
                                                 mock_provider, file_wb_path):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name,
                                                       **{})

        aiohttpretty.register_uri('HEAD',
                                  signed_url,
                                  status=HTTPStatus.NOT_FOUND)

        with pytest.raises(exceptions.MetadataError) as exc:
            await mock_provider._metadata_object(file_wb_path, is_folder=False)

        assert exc.value.code == HTTPStatus.NOT_FOUND
        assert aiohttpretty.has_call(method='HEAD', uri=signed_url)
예제 #23
0
    async def _metadata_object(self, path: WaterButlerPath,
                               is_folder: bool=False) \
                               -> typing.Union[GoogleCloudFileMetadata, GoogleCloudFolderMetadata]:
        """Get the metadata about the object with the given WaterButlerPath.

        API docs:

            GET Object: https://cloud.google.com/storage/docs/xml-api/get-object

            HEAD Object: https://cloud.google.com/storage/docs/xml-api/head-object

        .. note::

            Use ``HEAD`` instead of ``GET`` to retrieve the metadata of an object.  Google points
            out that:  "You should not use a ``GET`` object request to retrieve only non-ACL
            metadata, because doing so incurs egress charges associated with downloading the entire
            object.  Instead use a ``HEAD`` object request to retrieve non-ACL metadata for the
            object."

        .. note::

            The flag ``is_folder`` is explicitly used.  Providing the wrong type will always fail.
            This is the case for many internal/private methods of and helper/utility functions for
            this class. They are not exposed to any outside usage, including the parent classes.

        :param path: the WaterButlerPath of the object
        :type path: :class:`.WaterButlerPath`
        :param bool is_folder: whether the object is a file or folder
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: :class:`.GoogleCloudFolderMetadata`
        """

        req_method = 'HEAD'
        obj_name = utils.get_obj_name(path, is_folder=is_folder)
        signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{})

        resp = await self.make_request(
            req_method,
            signed_url,
            expects=(HTTPStatus.OK,),
            throws=MetadataError
        )
        await resp.release()

        if is_folder:
            return GoogleCloudFolderMetadata.new_from_resp_headers(obj_name, resp.headers)
        else:
            return GoogleCloudFileMetadata.new_from_resp_headers(obj_name, resp.headers)
예제 #24
0
    async def test_download_file_with_accept_url(self, mock_time,
                                                 mock_provider, file_wb_path):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        query = {
            'response-content-disposition':
            ('attachment; filename="text-file-1.txt"; '
             'filename*=UTF-8\'\'text-file-1.txt')
        }
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name,
                                                       **query)
        return_url = await mock_provider.download(file_wb_path,
                                                  accept_url=True)

        assert not aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(return_url, str)
        assert signed_url == return_url
예제 #25
0
    async def download(self, path: WaterButlerPath, accept_url=False, range=None,  # type: ignore
                       **kwargs) -> typing.Union[str, ResponseStreamReader]:
        """Download the object with the given path.


        API Docs:

            GET Object: https://cloud.google.com/storage/docs/xml-api/get-object

            Download an Object: https://cloud.google.com/storage/docs/xml-api/get-object-download

        The behavior of download differs depending on the value of ``accept_url``.  If
        ``accept_url == False``, WB makes a standard signed request and returns a
        ``ResponseStreamReader``.  If ``accept_url == True``, WB builds and signs the ``GET``
        request with an extra query parameter ``response-content-disposition`` to trigger the
        download with the display name.  The signed URL is returned.

        :param path: the WaterButlerPath for the object to download
        :type path: :class:`.WaterButlerPath`
        :param bool accept_url: should return a direct time-limited download url from the provider
        :param tuple range: the Range HTTP request header
        :param dict kwargs: ``display_name`` - the display name of the file on OSF and for download
        :rtype: str or :class:`.streams.ResponseStreamReader`
        """

        if path.is_folder:
            raise DownloadError('Cannot download folders', code=HTTPStatus.BAD_REQUEST)

        req_method = 'GET'
        obj_name = utils.get_obj_name(path, is_folder=False)

        if accept_url:
            display_name = kwargs.get('display_name') or path.name
            query = {'response-content-disposition': make_disposition(display_name)}
            # There is no need to delay URL building and signing
            signed_url = self._build_and_sign_url(req_method, obj_name, **query)  # type: ignore
            return signed_url

        signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{})
        resp = await self.make_request(
            req_method,
            signed_url,
            range=range,
            expects=(HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT),
            throws=DownloadError
        )
        return ResponseStreamReader(resp)
예제 #26
0
    async def test_download_file(self, mock_time, mock_provider, file_wb_path,
                                 file_raw):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name,
                                                       **{})

        aiohttpretty.register_uri('GET',
                                  signed_url,
                                  body=file_raw,
                                  status=HTTPStatus.OK)

        resp_stream_reader = await mock_provider.download(file_wb_path)
        file_content = await resp_stream_reader.read()

        assert aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(resp_stream_reader, ResponseStreamReader)
        assert file_content == file_raw
    async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_raw):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **{})

        aiohttpretty.register_uri(
            'GET',
            signed_url,
            body=file_raw,
            status=HTTPStatus.OK
        )

        resp_stream_reader = await mock_provider.download(file_wb_path)
        file_content = await resp_stream_reader.read()

        assert aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(resp_stream_reader, ResponseStreamReader)
        assert file_content == file_raw
예제 #28
0
    async def test_download_file_with_display_name(self, mock_time,
                                                   mock_provider, file_wb_path,
                                                   display_name_arg,
                                                   expected_name):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        query = {
            'response-content-disposition':
            ('attachment; filename="{}"; '
             'filename*=UTF-8\'\'{}').format(expected_name, expected_name)
        }
        signed_url = mock_provider._build_and_sign_url('GET', file_obj_name,
                                                       **query)
        return_url = await mock_provider.download(
            file_wb_path, accept_url=True, display_name=display_name_arg)

        assert not aiohttpretty.has_call(method='GET', uri=signed_url)
        assert isinstance(return_url, str)
        assert signed_url == return_url
    async def test_metadata_file(self, mock_time, mock_provider, file_wb_path, meta_file_raw,
                                 meta_file_parsed):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name, **{})

        resp_headers = utils.get_multi_dict_from_python_dict(dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri(
            'HEAD',
            signed_url,
            headers=resp_headers,
            status=HTTPStatus.OK
        )

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata = await mock_provider._metadata_object(file_wb_path, is_folder=False)

        assert isinstance(metadata, GoogleCloudFileMetadata)
        assert metadata == metadata_expected
예제 #30
0
    async def test_metadata_file(self, mock_time, mock_provider, file_wb_path,
                                 meta_file_raw, meta_file_parsed):
        file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
        signed_url = mock_provider._build_and_sign_url('HEAD', file_obj_name,
                                                       **{})

        resp_headers = utils.get_multi_dict_from_python_dict(
            dict(json.loads(meta_file_raw)))
        aiohttpretty.register_uri('HEAD',
                                  signed_url,
                                  headers=resp_headers,
                                  status=HTTPStatus.OK)

        metadata_json = json.loads(meta_file_parsed)
        metadata_expected = GoogleCloudFileMetadata(metadata_json)

        metadata = await mock_provider._metadata_object(file_wb_path,
                                                        is_folder=False)

        assert isinstance(metadata, GoogleCloudFileMetadata)
        assert metadata == metadata_expected
예제 #31
0
    async def upload(self, stream: BaseStream, path: WaterButlerPath, *args,
                     **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]:
        """Upload a file stream to the given WaterButlerPath.

        API docs:

            PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object

            Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload

        The response has an empty body. It does not have the required header ``Last-Modified``.
        In addition, the ``Content-Type`` header is for the response itself, not for the file WB
        just uploaded. WB must make an extra metadata request after a successful upload.

        The "etag" header returned by the XML API is exactly the same as the hex-digest of the
        MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash
        headers.

        Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling
        :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To
        Sign".  The signed request would fail and return ``HTTP 403 Forbidden`` with the error
        message ``SignatureDoesNotMatch`` if auto headers were not skipped.

        :param stream: the stream to post
        :type stream: :class:`.streams.BaseStream`
        :param path: the WaterButlerPath of the file to upload
        :type path: :class:`.WaterButlerPath`
        :param list args: additional args are ignored
        :param dict kwargs: additional kwargs are ignored
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await self.exists(path)

        stream.add_writer('md5', HashStreamWriter(hashlib.md5))

        req_method = 'PUT'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method, obj_name, **{})
        headers = {'Content-Length': str(stream.size)}

        resp = await self.make_request(
            req_method,
            signed_url,
            data=stream,
            skip_auto_headers={'Content-Type'},
            headers=headers,
            expects=(HTTPStatus.OK,),
            throws=UploadError
        )

        await resp.release()

        header_etag = resp.headers.get('etag', None)
        if not header_etag:
            raise UploadError('Missing response header "ETag" for upload.')

        if header_etag.strip('"') != stream.writers['md5'].hexdigest:
            raise UploadChecksumMismatchError()

        metadata = await self._metadata_object(path, is_folder=False)
        return metadata, created  # type: ignore
예제 #32
0
    async def upload(self, stream: BaseStream, path: WaterButlerPath, *args,
                     **kwargs) -> typing.Tuple[GoogleCloudFileMetadata, bool]:
        """Upload a file stream to the given WaterButlerPath.

        API docs:

            PUT Object: https://cloud.google.com/storage/docs/xml-api/put-object

            Upload an Object: https://cloud.google.com/storage/docs/xml-api/put-object-upload

        The response has an empty body. It does not have the required header ``Last-Modified``.
        In addition, the ``Content-Type`` header is for the response itself, not for the file WB
        just uploaded. WB must make an extra metadata request after a successful upload.

        The "etag" header returned by the XML API is exactly the same as the hex-digest of the
        MD5 hash. WB uses this header to verify the upload checksum instead of parsing the hash
        headers.

        Similarly to Amazon S3, WB must set ``skip_auto_headers={'Content-Type'}`` when calling
        :meth:`.BaseProvider.make_request()` because ``Content-Type`` is part of the "String To
        Sign".  The signed request would fail and return ``HTTP 403 Forbidden`` with the error
        message ``SignatureDoesNotMatch`` if auto headers were not skipped.

        :param stream: the stream to post
        :type stream: :class:`.streams.BaseStream`
        :param path: the WaterButlerPath of the file to upload
        :type path: :class:`.WaterButlerPath`
        :param list args: additional args are ignored
        :param dict kwargs: additional kwargs are ignored
        :rtype: :class:`.GoogleCloudFileMetadata`
        :rtype: bool
        """

        created = not await self.exists(path)

        stream.add_writer('md5', HashStreamWriter(hashlib.md5))

        req_method = 'PUT'
        obj_name = utils.get_obj_name(path, is_folder=False)
        signed_url = functools.partial(self._build_and_sign_url, req_method,
                                       obj_name, **{})
        headers = {'Content-Length': str(stream.size)}

        resp = await self.make_request(req_method,
                                       signed_url,
                                       data=stream,
                                       skip_auto_headers={'Content-Type'},
                                       headers=headers,
                                       expects=(HTTPStatus.OK, ),
                                       throws=UploadError)

        await resp.release()

        header_etag = resp.headers.get('etag', None)
        if not header_etag:
            raise UploadError('Missing response header "ETag" for upload.')

        if header_etag.strip('"') != stream.writers['md5'].hexdigest:
            raise UploadChecksumMismatchError()

        metadata = await self._metadata_object(path, is_folder=False)
        return metadata, created  # type: ignore