예제 #1
0
    def download(self, path, revision=None, range=None, **kwargs):
        if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION):
            # Must make additional request to look up download URL for revision
            response = yield from self.make_request(
                "GET",
                self.build_url("files", path.identifier, "revisions", revision, alt="json"),
                expects=(200,),
                throws=exceptions.MetadataError,
            )
            data = yield from response.json()
        else:
            data = yield from self.metadata(path, raw=True)

        download_resp = yield from self.make_request(
            "GET",
            data.get("downloadUrl") or drive_utils.get_export_link(data),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        if "fileSize" in data:
            return streams.ResponseStreamReader(download_resp, size=data["fileSize"])

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream((yield from download_resp.read()))
        if download_resp.headers.get("Content-Type"):
            stream.content_type = download_resp.headers["Content-Type"]
        if drive_utils.is_docs_file(data):
            stream.name = path.name + drive_utils.get_download_extension(data)
        return stream
예제 #2
0
    async def download(self, path, revision=None, range=None, **kwargs):
        if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION):
            # Must make additional request to look up download URL for revision
            async with self.request(
                'GET',
                self.build_url('files', path.identifier, 'revisions', revision, alt='json'),
                expects=(200, ),
                throws=exceptions.MetadataError,
            ) as response:
                data = await response.json()
        else:
            data = await self.metadata(path, raw=True)

        download_resp = await self.make_request(
            'GET',
            data.get('downloadUrl') or drive_utils.get_export_link(data),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        if 'fileSize' in data:
            return streams.ResponseStreamReader(download_resp, size=data['fileSize'])

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream(await download_resp.read())
        if download_resp.headers.get('Content-Type'):
            stream.content_type = download_resp.headers['Content-Type']
        if drive_utils.is_docs_file(data):
            stream.name = path.name + drive_utils.get_download_extension(data)
        return stream
예제 #3
0
    def _file_metadata(self, path, revision=None, raw=False):
        if revision:
            url = self.build_url('files', path.identifier, 'revisions',
                                 revision)
        else:
            url = self.build_url('files', path.identifier)

        resp = yield from self.make_request(
            'GET',
            url,
            expects=(200, ),
            throws=exceptions.MetadataError,
        )

        data = yield from resp.json()

        if revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        if drive_utils.is_docs_file(data):
            return (yield from self._handle_docs_versioning(path,
                                                            data,
                                                            raw=raw))

        return self._serialize_item(path, data, raw=raw)
예제 #4
0
 def name(self):
     title = self.raw['title']
     name, ext = os.path.splitext(title)
     if utils.is_docs_file(self.raw) and not ext:
         ext = utils.get_extension(self.raw['exportLinks'])
         title += ext
     return title
예제 #5
0
    async def download(self, path, revision=None, range=None, **kwargs):
        if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION):
            # Must make additional request to look up download URL for revision
            async with self.request(
                'GET',
                self.build_url('files', path.identifier, 'revisions', revision, alt='json'),
                expects=(200, ),
                throws=exceptions.MetadataError,
            ) as response:
                data = await response.json()
        else:
            data = await self.metadata(path, raw=True)

        download_resp = await self.make_request(
            'GET',
            data.get('downloadUrl') or drive_utils.get_export_link(data),
            range=range,
            expects=(200, 206),
            throws=exceptions.DownloadError,
        )

        if 'fileSize' in data:
            return streams.ResponseStreamReader(download_resp, size=data['fileSize'])

        # google docs, not drive files, have no way to get the file size
        # must buffer the entire file into memory
        stream = streams.StringStream(await download_resp.read())
        if download_resp.headers.get('Content-Type'):
            stream.content_type = download_resp.headers['Content-Type']
        if drive_utils.is_docs_file(data):
            stream.name = path.name + drive_utils.get_download_extension(data)
        return stream
예제 #6
0
 def name(self):
     title = self.raw['title']
     name, ext = os.path.splitext(title)
     if utils.is_docs_file(self.raw) and not ext:
         ext = utils.get_extension(self.raw['exportLinks'])
         title += ext
     return title
예제 #7
0
    def metadata(self, path, original_path=None, folder_id=None, raw=False, **kwargs):
        path = GoogleDrivePath(self.folder['name'], path)
        original_path = original_path or path
        folder_id = folder_id or self.folder['id']
        child = path.child

        title = None if (path.is_leaf and path.is_dir) else path.parts[1]
        query = self._build_query(folder_id, title=title)

        resp = yield from self.make_request(
            'GET',
            self.build_url('files', q=query, alt='json'),
            expects=(200, ),
            throws=exceptions.MetadataError,
        )
        data = yield from resp.json()

        # Raise 404 on empty results if file or partial lookup
        if not data['items']:
            if path.is_file or not path.is_leaf:
                raise exceptions.MetadataError('{} not found'.format(str(path)), code=http.client.NOT_FOUND)

        if not path.is_leaf:
            child_id = data['items'][0]['id']
            return (yield from self.metadata(str(child), original_path=original_path, folder_id=child_id, raw=raw, **kwargs))

        if path.is_dir:
            return [
                self._serialize_item(original_path, item, raw=raw)
                for item in data['items']
            ]

        # The "version" key does not correspond to revision IDs for Google Docs
        # files; make an extra request to the revisions endpoint to fetch the
        # true ID of the latest revision
        if drive_utils.is_docs_file(data['items'][0]):
            revisions_response = yield from self.make_request(
                'GET',
                self.build_url('files', data['items'][0]['id'], 'revisions'),
                expects=(200, ),
                throws=exceptions.RevisionsError,
            )
            revisions_data = yield from revisions_response.json()

            # Revisions are not available for some sharing configurations. If
            # revisions list is empty, use the etag of the file plus a sentinel
            # string as a dummy revision ID.
            if not revisions_data['items']:
                # If there are no revisions use etag as vid
                data['items'][0]['version'] = revisions_data['etag'] + settings.DRIVE_IGNORE_VERSION
            else:
                data['items'][0]['version'] = revisions_data['items'][-1]['id']

        return self._serialize_item(original_path.parent, data['items'][0], raw=raw)
예제 #8
0
    def _file_metadata(self, path, raw=False):
        resp = yield from self.make_request(
            'GET',
            self.build_url('files', path.identifier),
            expects=(200, ),
            throws=exceptions.MetadataError,
        )

        data = yield from resp.json()

        if drive_utils.is_docs_file(data):
            return (yield from self._handle_docs_versioning(path, data, raw=raw))

        return self._serialize_item(path, data, raw=raw)
예제 #9
0
    def _file_metadata(self, path, raw=False):
        resp = yield from self.make_request(
            'GET',
            self.build_url('files', path.identifier),
            expects=(200, ),
            throws=exceptions.MetadataError,
        )

        data = yield from resp.json()

        if drive_utils.is_docs_file(data):
            return (yield from self._handle_docs_versioning(path,
                                                            data,
                                                            raw=raw))

        return self._serialize_item(path, data, raw=raw)
예제 #10
0
    def _file_metadata(self, path, revision=None, raw=False):
        if revision:
            url = self.build_url("files", path.identifier, "revisions", revision)
        else:
            url = self.build_url("files", path.identifier)

        resp = yield from self.make_request("GET", url, expects=(200,), throws=exceptions.MetadataError)

        data = yield from resp.json()

        if revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        if drive_utils.is_docs_file(data):
            return (yield from self._handle_docs_versioning(path, data, raw=raw))

        return self._serialize_item(path, data, raw=raw)
예제 #11
0
    async def _file_metadata(self, path, revision=None, raw=False):
        if revision:
            url = self.build_url('files', path.identifier, 'revisions', revision)
        else:
            url = self.build_url('files', path.identifier)

        async with self.request(
            'GET', url,
            expects=(200, ),
            throws=exceptions.MetadataError,
        ) as resp:
            data = await resp.json()

        if revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        if drive_utils.is_docs_file(data):
            return await self._handle_docs_versioning(path, data, raw=raw)

        return self._serialize_item(path, data, raw=raw)
예제 #12
0
    async def _file_metadata(self,
                             path: GoogleDrivePath,
                             revision: str=None,
                             raw: bool=False):
        """ Returns metadata for the file identified by `path`.  If the `revision` arg is set,
        will attempt to return metadata for the given revision of the file.  If the revision does
        not exist, ``_file_metadata`` will throw a 404.

        This method used to error with a 500 when metadata was requested for a file that the
        authorizing user only had view or commenting permissions for.  The GDrive revisions
        endpoint returns a 403, which was not being handled.  WB postpends a sentinel value to the
        revisions for these files.  If a revision ending with this sentinel value is detected, this
        method will return metadata for the latest revision of the file.  If a revision NOT ending
        in the sentinel value is requested for a read-only file, this method will return a 404 Not
        Found instead.

        Metrics:

        ``_file_metadata.got_revision``: did this request include a revision parameter?

        ``_file_metadata.revision_is_valid``: if a revision was given, was it valid? A revision is
        "valid" if it doesn't end with our sentinal string (`settings.DRIVE_IGNORE_VERSION`).

        ``_file_metadata.user_role``: What role did the user possess? Helps identify other roles
        for which revision information isn't available.

        :param GoogleDrivePath path: the path of the file whose metadata is being requested
        :param str revision: a string representing the ID of the revision (default: `None`)
        :param bool raw: should we return the raw response object from the GDrive API?
        :rtype: GoogleDriveFileMetadata
        :rtype: dict
        :return: a metadata for the googledoc or the raw response object from the GDrive API
        """

        self.metrics.add('_file_metadata.got_revision', revision is not None)

        valid_revision = revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION)
        if revision:
            self.metrics.add('_file_metadata.revision_is_valid', valid_revision)

        if revision and valid_revision:
            url = self.build_url('files', path.identifier, 'revisions', revision)
        else:
            url = self.build_url('files', path.identifier)

        async with self.request(
            'GET', url,
            expects=(200, 403, 404, ),
            throws=exceptions.MetadataError,
        ) as resp:
            try:
                data = await resp.json()
            except:  # some 404s return a string instead of json
                data = await resp.read()

        if resp.status != 200:
            raise exceptions.NotFoundError(path)

        if revision and valid_revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        user_role = data['userPermission']['role']
        self.metrics.add('_file_metadata.user_role', user_role)
        can_access_revisions = user_role in self.ROLES_ALLOWING_REVISIONS
        if drive_utils.is_docs_file(data):
            if can_access_revisions:
                return await self._handle_docs_versioning(path, data, raw=raw)
            else:
                # Revisions are not available for some sharing configurations. If revisions list is
                # empty, use the etag of the file plus a sentinel string as a dummy revision ID.
                data['version'] = data['etag'] + settings.DRIVE_IGNORE_VERSION

        return data if raw else GoogleDriveFileMetadata(data, path)
예제 #13
0
 def extra(self):
     if utils.is_docs_file(self.raw):
         return {'downloadExt': utils.get_download_extension(self.raw)}
     return {'md5': self.raw['md5Checksum']}
예제 #14
0
 def extra(self):
     ret = super().extra
     if utils.is_docs_file(self.raw):
         ret['downloadExt'] = utils.get_download_extension(self.raw['exportLinks'])
     return ret
예제 #15
0
 def is_google_doc(self):
     return utils.is_docs_file(self.raw) is not None
예제 #16
0
 def name(self):
     title = self.raw['title']
     if utils.is_docs_file(self.raw):
         ext = utils.get_extension(self.raw)
         title += ext
     return title
예제 #17
0
 def extra(self):
     ret = super().extra
     if utils.is_docs_file(self.raw):
         ret['downloadExt'] = utils.get_download_extension(self.raw['exportLinks'])
     return ret
예제 #18
0
 def extra(self):
     ret = super().extra
     if utils.is_docs_file(self.raw):
         ret['downloadExt'] = utils.get_download_extension(self.raw)
     ret['webView'] = self.raw.get('alternateLink')
     return ret
예제 #19
0
 def name(self):
     title = self.raw.get('originalFilename', self._path.name)
     if utils.is_docs_file(self.raw):
         ext = utils.get_extension(self.raw)
         title += ext
     return title
예제 #20
0
 def name(self):
     title = self.raw['title']
     if utils.is_docs_file(self.raw):
         ext = utils.get_extension(self.raw)
         title += ext
     return title
예제 #21
0
 def extra(self):
     ret = super().extra
     if utils.is_docs_file(self.raw):
         ret['downloadExt'] = utils.get_download_extension(self.raw)
     ret['webView'] = self.raw.get('alternateLink')
     return ret
예제 #22
0
 def is_google_doc(self):
     return utils.is_docs_file(self.raw) is not None
예제 #23
0
    async def _file_metadata(self,
                             path: GoogleDrivePath,
                             revision: str=None,
                             raw: bool=False) -> Union[dict, BaseGoogleDriveMetadata]:
        """ Returns metadata for the file identified by `path`.  If the `revision` arg is set,
        will attempt to return metadata for the given revision of the file.  If the revision does
        not exist, ``_file_metadata`` will throw a 404.

        This method used to error with a 500 when metadata was requested for a file that the
        authorizing user only had view or commenting permissions for.  The GDrive revisions
        endpoint returns a 403, which was not being handled.  WB postpends a sentinel value to the
        revisions for these files.  If a revision ending with this sentinel value is detected, this
        method will return metadata for the latest revision of the file.  If a revision NOT ending
        in the sentinel value is requested for a read-only file, this method will return a 404 Not
        Found instead.

        Metrics:

        ``_file_metadata.got_revision``: did this request include a revision parameter?

        ``_file_metadata.revision_is_valid``: if a revision was given, was it valid? A revision is
        "valid" if it doesn't end with our sentinal string (`settings.DRIVE_IGNORE_VERSION`).

        ``_file_metadata.user_role``: What role did the user possess? Helps identify other roles
        for which revision information isn't available.

        :param GoogleDrivePath path: the path of the file whose metadata is being requested
        :param str revision: a string representing the ID of the revision (default: `None`)
        :param bool raw: should we return the raw response object from the GDrive API?
        :rtype: GoogleDriveFileMetadata
        :rtype: dict
        :return: a metadata for the googledoc or the raw response object from the GDrive API
        """

        self.metrics.add('_file_metadata.got_revision', revision is not None)

        valid_revision = revision and not revision.endswith(pd_settings.DRIVE_IGNORE_VERSION)
        if revision:
            self.metrics.add('_file_metadata.revision_is_valid', valid_revision)

        if revision and valid_revision:
            url = self.build_url('files', path.identifier, 'revisions', revision)
        else:
            url = self.build_url('files', path.identifier)

        async with self.request(
            'GET', url,
            expects=(200, 403, 404, ),
            throws=exceptions.MetadataError,
        ) as resp:
            try:
                data = await resp.json()
            except:  # some 404s return a string instead of json
                data = await resp.read()

        if resp.status != 200:
            raise exceptions.NotFoundError(path)

        if revision and valid_revision:
            return GoogleDriveFileRevisionMetadata(data, path)

        user_role = data['userPermission']['role']
        self.metrics.add('_file_metadata.user_role', user_role)
        can_access_revisions = user_role in self.ROLES_ALLOWING_REVISIONS
        if utils.is_docs_file(data):
            if can_access_revisions:
                return await self._handle_docs_versioning(path, data, raw=raw)
            else:
                # Revisions are not available for some sharing configurations. If revisions list is
                # empty, use the etag of the file plus a sentinel string as a dummy revision ID.
                data['version'] = data['etag'] + pd_settings.DRIVE_IGNORE_VERSION

        return data if raw else GoogleDriveFileMetadata(data, path)