def download(self, path, revision=None, range=None, **kwargs): if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION): # Must make additional request to look up download URL for revision response = yield from self.make_request( "GET", self.build_url("files", path.identifier, "revisions", revision, alt="json"), expects=(200,), throws=exceptions.MetadataError, ) data = yield from response.json() else: data = yield from self.metadata(path, raw=True) download_resp = yield from self.make_request( "GET", data.get("downloadUrl") or drive_utils.get_export_link(data), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) if "fileSize" in data: return streams.ResponseStreamReader(download_resp, size=data["fileSize"]) # google docs, not drive files, have no way to get the file size # must buffer the entire file into memory stream = streams.StringStream((yield from download_resp.read())) if download_resp.headers.get("Content-Type"): stream.content_type = download_resp.headers["Content-Type"] if drive_utils.is_docs_file(data): stream.name = path.name + drive_utils.get_download_extension(data) return stream
async def download(self, path, revision=None, range=None, **kwargs): if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION): # Must make additional request to look up download URL for revision async with self.request( 'GET', self.build_url('files', path.identifier, 'revisions', revision, alt='json'), expects=(200, ), throws=exceptions.MetadataError, ) as response: data = await response.json() else: data = await self.metadata(path, raw=True) download_resp = await self.make_request( 'GET', data.get('downloadUrl') or drive_utils.get_export_link(data), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) if 'fileSize' in data: return streams.ResponseStreamReader(download_resp, size=data['fileSize']) # google docs, not drive files, have no way to get the file size # must buffer the entire file into memory stream = streams.StringStream(await download_resp.read()) if download_resp.headers.get('Content-Type'): stream.content_type = download_resp.headers['Content-Type'] if drive_utils.is_docs_file(data): stream.name = path.name + drive_utils.get_download_extension(data) return stream
def _file_metadata(self, path, revision=None, raw=False): if revision: url = self.build_url('files', path.identifier, 'revisions', revision) else: url = self.build_url('files', path.identifier) resp = yield from self.make_request( 'GET', url, expects=(200, ), throws=exceptions.MetadataError, ) data = yield from resp.json() if revision: return GoogleDriveFileRevisionMetadata(data, path) if drive_utils.is_docs_file(data): return (yield from self._handle_docs_versioning(path, data, raw=raw)) return self._serialize_item(path, data, raw=raw)
def name(self): title = self.raw['title'] name, ext = os.path.splitext(title) if utils.is_docs_file(self.raw) and not ext: ext = utils.get_extension(self.raw['exportLinks']) title += ext return title
def metadata(self, path, original_path=None, folder_id=None, raw=False, **kwargs): path = GoogleDrivePath(self.folder['name'], path) original_path = original_path or path folder_id = folder_id or self.folder['id'] child = path.child title = None if (path.is_leaf and path.is_dir) else path.parts[1] query = self._build_query(folder_id, title=title) resp = yield from self.make_request( 'GET', self.build_url('files', q=query, alt='json'), expects=(200, ), throws=exceptions.MetadataError, ) data = yield from resp.json() # Raise 404 on empty results if file or partial lookup if not data['items']: if path.is_file or not path.is_leaf: raise exceptions.MetadataError('{} not found'.format(str(path)), code=http.client.NOT_FOUND) if not path.is_leaf: child_id = data['items'][0]['id'] return (yield from self.metadata(str(child), original_path=original_path, folder_id=child_id, raw=raw, **kwargs)) if path.is_dir: return [ self._serialize_item(original_path, item, raw=raw) for item in data['items'] ] # The "version" key does not correspond to revision IDs for Google Docs # files; make an extra request to the revisions endpoint to fetch the # true ID of the latest revision if drive_utils.is_docs_file(data['items'][0]): revisions_response = yield from self.make_request( 'GET', self.build_url('files', data['items'][0]['id'], 'revisions'), expects=(200, ), throws=exceptions.RevisionsError, ) revisions_data = yield from revisions_response.json() # Revisions are not available for some sharing configurations. If # revisions list is empty, use the etag of the file plus a sentinel # string as a dummy revision ID. if not revisions_data['items']: # If there are no revisions use etag as vid data['items'][0]['version'] = revisions_data['etag'] + settings.DRIVE_IGNORE_VERSION else: data['items'][0]['version'] = revisions_data['items'][-1]['id'] return self._serialize_item(original_path.parent, data['items'][0], raw=raw)
def _file_metadata(self, path, raw=False): resp = yield from self.make_request( 'GET', self.build_url('files', path.identifier), expects=(200, ), throws=exceptions.MetadataError, ) data = yield from resp.json() if drive_utils.is_docs_file(data): return (yield from self._handle_docs_versioning(path, data, raw=raw)) return self._serialize_item(path, data, raw=raw)
def _file_metadata(self, path, revision=None, raw=False): if revision: url = self.build_url("files", path.identifier, "revisions", revision) else: url = self.build_url("files", path.identifier) resp = yield from self.make_request("GET", url, expects=(200,), throws=exceptions.MetadataError) data = yield from resp.json() if revision: return GoogleDriveFileRevisionMetadata(data, path) if drive_utils.is_docs_file(data): return (yield from self._handle_docs_versioning(path, data, raw=raw)) return self._serialize_item(path, data, raw=raw)
async def _file_metadata(self, path, revision=None, raw=False): if revision: url = self.build_url('files', path.identifier, 'revisions', revision) else: url = self.build_url('files', path.identifier) async with self.request( 'GET', url, expects=(200, ), throws=exceptions.MetadataError, ) as resp: data = await resp.json() if revision: return GoogleDriveFileRevisionMetadata(data, path) if drive_utils.is_docs_file(data): return await self._handle_docs_versioning(path, data, raw=raw) return self._serialize_item(path, data, raw=raw)
async def _file_metadata(self, path: GoogleDrivePath, revision: str=None, raw: bool=False): """ Returns metadata for the file identified by `path`. If the `revision` arg is set, will attempt to return metadata for the given revision of the file. If the revision does not exist, ``_file_metadata`` will throw a 404. This method used to error with a 500 when metadata was requested for a file that the authorizing user only had view or commenting permissions for. The GDrive revisions endpoint returns a 403, which was not being handled. WB postpends a sentinel value to the revisions for these files. If a revision ending with this sentinel value is detected, this method will return metadata for the latest revision of the file. If a revision NOT ending in the sentinel value is requested for a read-only file, this method will return a 404 Not Found instead. Metrics: ``_file_metadata.got_revision``: did this request include a revision parameter? ``_file_metadata.revision_is_valid``: if a revision was given, was it valid? A revision is "valid" if it doesn't end with our sentinal string (`settings.DRIVE_IGNORE_VERSION`). ``_file_metadata.user_role``: What role did the user possess? Helps identify other roles for which revision information isn't available. :param GoogleDrivePath path: the path of the file whose metadata is being requested :param str revision: a string representing the ID of the revision (default: `None`) :param bool raw: should we return the raw response object from the GDrive API? :rtype: GoogleDriveFileMetadata :rtype: dict :return: a metadata for the googledoc or the raw response object from the GDrive API """ self.metrics.add('_file_metadata.got_revision', revision is not None) valid_revision = revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION) if revision: self.metrics.add('_file_metadata.revision_is_valid', valid_revision) if revision and valid_revision: url = self.build_url('files', path.identifier, 'revisions', revision) else: url = self.build_url('files', path.identifier) async with self.request( 'GET', url, expects=(200, 403, 404, ), throws=exceptions.MetadataError, ) as resp: try: data = await resp.json() except: # some 404s return a string instead of json data = await resp.read() if resp.status != 200: raise exceptions.NotFoundError(path) if revision and valid_revision: return GoogleDriveFileRevisionMetadata(data, path) user_role = data['userPermission']['role'] self.metrics.add('_file_metadata.user_role', user_role) can_access_revisions = user_role in self.ROLES_ALLOWING_REVISIONS if drive_utils.is_docs_file(data): if can_access_revisions: return await self._handle_docs_versioning(path, data, raw=raw) else: # Revisions are not available for some sharing configurations. If revisions list is # empty, use the etag of the file plus a sentinel string as a dummy revision ID. data['version'] = data['etag'] + settings.DRIVE_IGNORE_VERSION return data if raw else GoogleDriveFileMetadata(data, path)
def extra(self): if utils.is_docs_file(self.raw): return {'downloadExt': utils.get_download_extension(self.raw)} return {'md5': self.raw['md5Checksum']}
def extra(self): ret = super().extra if utils.is_docs_file(self.raw): ret['downloadExt'] = utils.get_download_extension(self.raw['exportLinks']) return ret
def is_google_doc(self): return utils.is_docs_file(self.raw) is not None
def name(self): title = self.raw['title'] if utils.is_docs_file(self.raw): ext = utils.get_extension(self.raw) title += ext return title
def extra(self): ret = super().extra if utils.is_docs_file(self.raw): ret['downloadExt'] = utils.get_download_extension(self.raw) ret['webView'] = self.raw.get('alternateLink') return ret
def name(self): title = self.raw.get('originalFilename', self._path.name) if utils.is_docs_file(self.raw): ext = utils.get_extension(self.raw) title += ext return title
async def _file_metadata(self, path: GoogleDrivePath, revision: str=None, raw: bool=False) -> Union[dict, BaseGoogleDriveMetadata]: """ Returns metadata for the file identified by `path`. If the `revision` arg is set, will attempt to return metadata for the given revision of the file. If the revision does not exist, ``_file_metadata`` will throw a 404. This method used to error with a 500 when metadata was requested for a file that the authorizing user only had view or commenting permissions for. The GDrive revisions endpoint returns a 403, which was not being handled. WB postpends a sentinel value to the revisions for these files. If a revision ending with this sentinel value is detected, this method will return metadata for the latest revision of the file. If a revision NOT ending in the sentinel value is requested for a read-only file, this method will return a 404 Not Found instead. Metrics: ``_file_metadata.got_revision``: did this request include a revision parameter? ``_file_metadata.revision_is_valid``: if a revision was given, was it valid? A revision is "valid" if it doesn't end with our sentinal string (`settings.DRIVE_IGNORE_VERSION`). ``_file_metadata.user_role``: What role did the user possess? Helps identify other roles for which revision information isn't available. :param GoogleDrivePath path: the path of the file whose metadata is being requested :param str revision: a string representing the ID of the revision (default: `None`) :param bool raw: should we return the raw response object from the GDrive API? :rtype: GoogleDriveFileMetadata :rtype: dict :return: a metadata for the googledoc or the raw response object from the GDrive API """ self.metrics.add('_file_metadata.got_revision', revision is not None) valid_revision = revision and not revision.endswith(pd_settings.DRIVE_IGNORE_VERSION) if revision: self.metrics.add('_file_metadata.revision_is_valid', valid_revision) if revision and valid_revision: url = self.build_url('files', path.identifier, 'revisions', revision) else: url = self.build_url('files', path.identifier) async with self.request( 'GET', url, expects=(200, 403, 404, ), throws=exceptions.MetadataError, ) as resp: try: data = await resp.json() except: # some 404s return a string instead of json data = await resp.read() if resp.status != 200: raise exceptions.NotFoundError(path) if revision and valid_revision: return GoogleDriveFileRevisionMetadata(data, path) user_role = data['userPermission']['role'] self.metrics.add('_file_metadata.user_role', user_role) can_access_revisions = user_role in self.ROLES_ALLOWING_REVISIONS if utils.is_docs_file(data): if can_access_revisions: return await self._handle_docs_versioning(path, data, raw=raw) else: # Revisions are not available for some sharing configurations. If revisions list is # empty, use the etag of the file plus a sentinel string as a dummy revision ID. data['version'] = data['etag'] + pd_settings.DRIVE_IGNORE_VERSION return data if raw else GoogleDriveFileMetadata(data, path)