async def download( self, # type: ignore path: WaterButlerPath, revision: str = None, range: Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: if path.identifier is None: raise exceptions.DownloadError('"{}" not found'.format(str(path)), code=404) query = {} if revision and revision != path.identifier: query['version'] = revision logger.debug('request-range:: {}'.format(range)) resp = await self.make_request( 'GET', self.build_url('files', path.identifier, 'content', **query), headers={'Accept-Encoding': ''}, range=range, expects=(200, 206), throws=exceptions.DownloadError, ) logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers])) return streams.ResponseStreamReader(resp)
def download(self): response = yield from aiohttp.request('GET', self.url) if response.status >= 400: raise exceptions.ProviderError( 'Unable to download the requested file, please try again later.', code=response.status) return streams.ResponseStreamReader(response)
async def download(self, path, revision=None, range=None, **kwargs): if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION): metadata = await self.metadata(path, revision=revision) else: metadata = await self.metadata(path) download_resp = await self.make_request( 'GET', metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) if metadata.size is not None: return streams.ResponseStreamReader(download_resp, size=metadata.size) # google docs, not drive files, have no way to get the file size # must buffer the entire file into memory stream = streams.StringStream(await download_resp.read()) if download_resp.headers.get('Content-Type'): stream.content_type = download_resp.headers['Content-Type'] stream.name = metadata.export_name return stream
async def download(self): """Download file from WaterButler, returning stream.""" download_url = await self._fetch_download_url() headers = {settings.MFR_IDENTIFYING_HEADER: '1'} response = await self._make_request('GET', download_url, allow_redirects=False, headers=headers) if response.status >= 400: resp_text = await response.text() logger.error('Unable to download file: ({}) {}'.format( response.status, resp_text)) raise exceptions.DownloadError( 'Unable to download the requested file, please try again later.', download_url=download_url, response=resp_text, provider=self.NAME, ) self.metrics.add('download.saw_redirect', False) if response.status in (302, 301): await response.release() response = await aiohttp.request('GET', response.headers['location']) self.metrics.add('download.saw_redirect', True) return streams.ResponseStreamReader(response, unsizable=True)
async def download(self, path: BitbucketPath, # type: ignore range: Tuple[int, int]=None, **kwargs) -> streams.ResponseStreamReader: """Get the stream to the specified file on Bitbucket In BB API 2.0, the ``repo/username/repo_slug/src/node/path`` endpoint is used for download. Please note that same endpoint has several different usages / behaviors depending on the type of the path and the query params. 1) File download: type is file, no query param``format=meta`` 2) File metadata: type is file, with ``format=meta`` as query param 3) Folder contents: type is folder, no query param``format=meta`` 4) Folder metadata: type is folder, with ``format=meta`` as query param API Doc: https://developer.atlassian.com/bitbucket/api/2/reference/resource/repositories/%7Busername%7D/%7Brepo_slug%7D/src/%7Bnode%7D/%7Bpath%7D :param path: the BitbucketPath object of the file to be downloaded :param range: the range header """ metadata = await self.metadata(path) logger.debug('requested-range:: {}'.format(range)) resp = await self.make_request( 'GET', self._build_v2_repo_url('src', path.commit_sha, *path.path_tuple()), range=range, expects=(200, ), throws=exceptions.DownloadError, ) logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers])) return streams.ResponseStreamReader(resp, size=metadata.size)
def download(self, path, revision=None, **kwargs): if revision and not revision.endswith(settings.DRIVE_IGNORE_VERSION): # Must make additional request to look up download URL for revision response = yield from self.make_request( 'GET', self.build_url('files', path.identifier, 'revisions', revision, alt='json'), expects=(200, ), throws=exceptions.MetadataError, ) data = yield from response.json() else: data = yield from self.metadata(path, raw=True) download_resp = yield from self.make_request( 'GET', data.get('downloadUrl') or drive_utils.get_export_link(data['exportLinks']), expects=(200, ), throws=exceptions.DownloadError, ) if 'fileSize' in data: return streams.ResponseStreamReader(download_resp, size=data['fileSize']) # google docs, not drive files, have no way to get the file size # must buffer the entire file into memory stream = streams.StringStream((yield from download_resp.read())) if download_resp.headers.get('Content-Type'): stream.content_type = download_resp.headers['Content-Type'] return stream
async def download( self, # type: ignore path: WaterButlerPath, revision: str = None, range: typing.Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: path_arg = { "path": ("rev:" + revision if revision else path.full_path) } resp = await self.make_request( 'POST', self._build_content_url('files', 'download'), headers={ 'Dropbox-API-Arg': json.dumps(path_arg), 'Content-Type': '' }, range=range, expects=( 200, 206, 409, ), throws=exceptions.DownloadError, ) if resp.status == 409: data = await resp.json() self.dropbox_conflict_error_handler(data) if 'Content-Length' not in resp.headers: size = json.loads(resp.headers['dropbox-api-result'])['size'] else: size = None # ResponseStreamReader will extract it from the resp return streams.ResponseStreamReader(resp, size=size)
async def download(self, path, accept_url=False, range=None, **kwargs): """Creates a stream for downloading files from the remote host. If the metadata query for the file has no size metadata, downloads to memory. :param waterbutler.core.path.WaterButlerPath path: user-supplied path to download :raises: `waterbutler.core.exceptions.DownloadError` """ self.metrics.add( 'download', { 'got_accept_url': accept_url is False, 'got_range': range is not None, }) download_resp = await self.make_request( 'GET', self._webdav_url_ + path.full_path, range=range, expects=( 200, 206, ), throws=exceptions.DownloadError, auth=self._auth, connector=self.connector(), ) return streams.ResponseStreamReader(download_resp)
async def download(self, path: WaterButlerPath, revision: str=None, # type: ignore range: Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: """Returns a ResponseWrapper (Stream) for the specified path raises FileNotFoundError if the status from Dataverse is not 200 :param WaterButlerPath path: Path to the file you want to download :param str revision: Used to verify if file is in selected dataset - 'latest' to check draft files - 'latest-published' to check published files - None to check all data :param Tuple[int, int] range: the range header :param dict \*\*kwargs: Additional arguments that are ignored :rtype: :class:`waterbutler.core.streams.ResponseStreamReader` :raises: :class:`waterbutler.core.exceptions.DownloadError` """ if path.identifier is None: raise exceptions.NotFoundError(str(path)) logger.debug('request-range:: {}'.format(range)) resp = await self.make_request( 'GET', self.build_url(pd_settings.DOWN_BASE_URL, path.identifier, key=self.token), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download( self, path: FigsharePath, # type: ignore range: Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: """Download the file identified by ``path`` from this project. :param FigsharePath path: FigsharePath to file you want to download :rtype streams.ResponseStreamReader: """ if not path.is_file: raise exceptions.NotFoundError(str(path)) file_metadata = await self.metadata(path) download_url = file_metadata.extra['downloadUrl'] # type: ignore if download_url is None: raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN) logger.debug('requested-range:: {}'.format(range)) params = {} if file_metadata.is_public else { 'token': self.token } # type: ignore resp = await self.make_request( 'GET', download_url, range=range, params=params, ) if resp.status == 404: await resp.release() raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN) return streams.ResponseStreamReader(resp)
async def download(self, path, accept_url=False, revision=None, range=None, **kwargs): """Returns a ResponseWrapper (Stream) for the specified path raises FileNotFoundError if the status from S3 is not 200 :param str path: Path to the key you want to download :param dict \*\*kwargs: Additional arguments that are ignored :rtype: :class:`waterbutler.core.streams.ResponseStreamReader` :raises: :class:`waterbutler.core.exceptions.DownloadError` """ await self._check_region() if not path.is_file: raise exceptions.DownloadError('No file specified for download', code=400) if not revision or revision.lower() == 'latest': query_parameters = None else: query_parameters = {'versionId': revision} if kwargs.get('displayName'): response_headers = { 'response-content-disposition': 'attachment; filename*=UTF-8\'\'{}'.format( parse.quote(kwargs['displayName'])) } else: response_headers = {'response-content-disposition': 'attachment'} url = functools.partial(self.bucket.new_key(path.path).generate_url, settings.TEMP_URL_SECS, query_parameters=query_parameters, response_headers=response_headers) if accept_url: return url() resp = await self.make_request( 'GET', url, range=range, expects=( 200, 206, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download( self, # type: ignore path: GitLabPath, **kwargs): """Return a stream to the specified file on GitLab. There is an endpoint for downloading the raw file directly, but we cannot use it because GitLab requires periods in the file path to be encoded. Python and aiohttp make this difficult, though their behavior is arguably correct. See https://gitlab.com/gitlab-org/gitlab-ce/issues/31470 for details. (Update: this is due to be fixed in the GL 10.0 release) API docs: https://docs.gitlab.com/ce/api/repository_files.html#get-file-from-repository This uses the same endpoint as `_fetch_file_contents`, but relies on the response headers, which are not returned by that method. It may also be replaced when the above bug is fixed. :param str path: The path to the file on GitLab :param dict \*\*kwargs: Ignored :raises: :class:`waterbutler.core.exceptions.DownloadError` """ url = self._build_file_url(path) resp = await self.make_request( 'GET', url, expects=(200, ), throws=exceptions.DownloadError, ) raw_data = (await resp.read()).decode("utf-8") data = None try: data = json.loads(raw_data) except json.decoder.JSONDecodeError: # GitLab API sometimes returns ruby hashes instead of json # see: https://gitlab.com/gitlab-org/gitlab-ce/issues/31790 # fixed in GL v9.5 data = self._convert_ruby_hash_to_dict(raw_data) raw = base64.b64decode(data['content']) mdict_options = {} mimetype = mimetypes.guess_type(path.full_path)[0] if mimetype is not None: mdict_options['CONTENT-TYPE'] = mimetype mdict = aiohttp.multidict.MultiDict(resp.headers) mdict.update(mdict_options) resp.headers = mdict resp.content = streams.StringStream(raw) return streams.ResponseStreamReader(resp, len(raw))
def download(self): download_url = yield from self._fetch_download_url() response = yield from self._make_request('GET', download_url, allow_redirects=False) if response.status >= 400: raise exceptions.ProviderError( 'Unable to download the requested file, please try again later.', code=response.status) if response.status in (302, 301): response = yield from aiohttp.request('GET', response.headers['location']) return streams.ResponseStreamReader(response, unsizable=True)
async def download(self): response = await aiohttp.request('GET', self.url) if response.status >= 400: err_resp = await response.read() logger.error('Unable to download file: ({}) {}'.format( response.status, err_resp.decode('utf-8'))) raise exceptions.DownloadError( 'Unable to download the requested file, please try again later.', download_url=self.url, response=await response.text(), code=response.status, provider='http', ) return streams.ResponseStreamReader(response)
async def download(self, path: BitbucketPath, **kwargs): # type: ignore '''Get the stream to the specified file on bitbucket :param str path: The path to the file on bitbucket ''' metadata = await self.metadata(path) resp = await self.make_request( 'GET', self._build_v1_repo_url('raw', path.commit_sha, *path.path_tuple()), expects=(200, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp, size=metadata.size)
async def download( self, # type: ignore path: GoogleDrivePath, revision: str = None, range: Tuple[int, int] = None, **kwargs) -> streams.BaseStream: """Download the file at `path`. If `revision` is present, attempt to download that revision of the file. See **Revisions** in the class doctring for an explanation of this provider's revision handling. The actual revision handling is done in `_file_metadata()`. Quirks: Google docs don't have a size until they're exported, so WB must download them, then re-stream them as a StringStream. :param GoogleDrivePath path: the file to download :param str revision: the id of a particular version to download :param tuple(int, int) range: range of bytes to download in this request :rtype: streams.ResponseStreamReader :rtype: streams.StringStream :returns: For GDocs, a StringStream. All others, a ResponseStreamReader. """ metadata = await self.metadata(path, revision=revision) download_resp = await self.make_request( 'GET', metadata.raw.get('downloadUrl') or utils.get_export_link(metadata.raw), # type: ignore range=range, expects=(200, 206), throws=exceptions.DownloadError, ) if metadata.size is not None: # type: ignore return streams.ResponseStreamReader( download_resp, size=metadata.size_as_int) # type: ignore # google docs, not drive files, have no way to get the file size # must buffer the entire file into memory stream = streams.StringStream(await download_resp.read()) if download_resp.headers.get('Content-Type'): # TODO: Add these properties to base class officially, instead of as one-off stream.content_type = download_resp.headers[ 'Content-Type'] # type: ignore stream.name = metadata.export_name # type: ignore return stream
def download(self, path, revision=None, range=None, **kwargs): if path.identifier is None: raise exceptions.DownloadError('"{}" not found'.format(str(path)), code=404) query = {} if revision and revision != path.identifier: query['version'] = revision resp = yield from self.make_request( 'GET', self.build_url('files', path.identifier, 'content', **query), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download( self, # type: ignore path: WaterButlerPath, revision: str = None, range: typing.Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: """ Dropbox V2 API Files Download https://www.dropbox.com/developers/documentation/http/documentation#files-download Request and Response Format: Content-download endpoints https://www.dropbox.com/developers/documentation/http/documentation#formats According to Dropbox's API docs for files download and content-download endpoints, the file content is contained in the response body and the result (metadata about the file) appears as JSON in the "Dropbox-API-Result" response header. As far as the WB Dropbox provider is concerned, the header contains the size (in bytes) of the file that ``ResponseStreamReader`` needs if the "Content-Length" header is not provided. """ path_arg = { "path": ("rev:" + revision if revision else path.full_path) } resp = await self.make_request( 'POST', self._build_content_url('files', 'download'), headers={ 'Dropbox-API-Arg': json.dumps(path_arg), 'Content-Type': '' }, range=range, expects=( 200, 206, 409, ), throws=core_exceptions.DownloadError, ) if resp.status == 409: data = await resp.json() self.dropbox_conflict_error_handler(data) if 'Content-Length' not in resp.headers: size = json.loads(resp.headers['dropbox-api-result'])['size'] else: size = None # ResponseStreamReader will extract it from the resp return streams.ResponseStreamReader(resp, size=size)
async def download(self, path, revision=None, **kwargs): '''Get the stream to the specified file on github :param str path: The path to the file on github :param str ref: The git 'ref' a branch or commit sha at which to get the file from :param str fileSha: The sha of file to be downloaded if specifed path will be ignored :param dict kwargs: Ignored ''' data = await self.metadata(path, revision=revision) file_sha = path.file_sha or data.extra['fileSha'] resp = await self.make_request( 'GET', self.build_repo_url('git', 'blobs', file_sha), headers={'Accept': 'application/vnd.github.v3.raw'}, expects=(200, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp, size=data.size)
def download(self, path, revision=None, **kwargs): if revision: url = self._build_content_url('files', 'auto', path.full_path, rev=revision) else: # Dont add unused query parameters url = self._build_content_url('files', 'auto', path.full_path) resp = yield from self.make_request( 'GET', url, expects=(200, ), throws=exceptions.DownloadError, ) if 'Content-Length' not in resp.headers: size = json.loads(resp.headers['X-DROPBOX-METADATA'])['bytes'] else: size = None return streams.ResponseStreamReader(resp, size=size)
def download(self, path, **kwargs): """Download a file. Note: Although Figshare may return a download URL, the `accept_url` parameter is ignored here, since Figshare does not support HTTPS for downloads. :param str path: Path to the key you want to download :rtype ResponseWrapper: """ if path.identifier is None: raise exceptions.NotFoundError(str(path)) file_metadata = yield from self.metadata(path) download_url = file_metadata.extra['downloadUrl'] if download_url is None: raise exceptions.DownloadError( 'Cannot download private files', code=http.client.FORBIDDEN, ) resp = yield from aiohttp.request('GET', download_url) return streams.ResponseStreamReader(resp)
async def download(self, path: BitbucketPath, # type: ignore range: Tuple[int, int]=None, **kwargs) -> streams.ResponseStreamReader: """Get the stream to the specified file on Bitbucket :param path: The path to the file on Bitbucket :param range: the range header """ metadata = await self.metadata(path) logger.debug('requested-range:: {}'.format(range)) resp = await self.make_request( 'GET', self._build_v1_repo_url('raw', path.commit_sha, *path.path_tuple()), range=range, expects=(200, ), throws=exceptions.DownloadError, ) logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers])) return streams.ResponseStreamReader(resp, size=metadata.size)
def download(self, path, accept_url=False, **kwargs): """Returns a ResponseStreamReader (Stream) for the specified path :param str path: Path to the object you want to download :param dict **kwargs: Additional arguments that are ignored :rtype str: :rtype ResponseStreamReader: :raises: exceptions.DownloadError """ if accept_url: parsed_url = furl.furl(self.sign_url(path, endpoint=self.public_endpoint)) parsed_url.args['filename'] = kwargs.get('displayName') or path.name return parsed_url.url resp = yield from self.make_request( 'GET', self.sign_url(path), expects=(200, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download(self, path, **kwargs): """Download the file identified by ``path`` from this project. :param FigsharePath path: FigsharePath to file you want to download :rtype ResponseStreamReader: """ if not path.is_file: raise exceptions.NotFoundError(str(path)) file_metadata = await self.metadata(path) download_url = file_metadata.extra['downloadUrl'] if download_url is None: raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN) params = {} if file_metadata.is_public else {'token': self.token} resp = await aiohttp.request('GET', download_url, params=params) if resp.status == 404: await resp.release() raise exceptions.DownloadError('Download not available', code=HTTPStatus.FORBIDDEN) return streams.ResponseStreamReader(resp)
async def download(self, path: GitLabPath, **kwargs): # type: ignore r"""Return a stream to the specified file on GitLab. API Docs: https://docs.gitlab.com/ce/api/repository_files.html#get-raw-file-from-repository Historically this method was implemented using a different endpoint which returned the file data as a base-64 encoded string. We used this endpoint because the one listed above was buggy (see: https://gitlab.com/gitlab-org/gitlab-ce/issues/31470). That issue has since been fixed in GL. We removed the workaround since it required slurping the file contents into memory. As a side-effect, the Gitlab download() method no longer supports the Range header. It had been manually implemented by array slicing the slurped data. The raw file endpoint does not currently respect it. :param str path: The path to the file on GitLab :param dict \*\*kwargs: Ignored :raises: :class:`waterbutler.core.exceptions.DownloadError` """ logger.debug('requested-range:: {}'.format(range)) url = self._build_file_url(path, raw=True) resp = await self.make_request( 'GET', url, expects=( 200, 206, ), throws=exceptions.DownloadError, ) logger.debug('download-headers:: {}'.format([(x, resp.headers[x]) for x in resp.headers])) # get size from X-Gitlab-Size header, since some responses don't set Content-Length return streams.ResponseStreamReader(resp, size=int( resp.headers['X-Gitlab-Size']))
async def download(self, path: GitHubPath, range: Tuple[int, int]=None, # type: ignore revision=None, **kwargs) -> streams.ResponseStreamReader: """Get the stream to the specified file on github :param GitHubPath path: The path to the file on github :param range: The range header :param revision: :param dict kwargs: Additional kwargs are ignored """ data = await self.metadata(path, revision=revision) file_sha = path.file_sha or data.extra['fileSha'] logger.debug('requested-range:: {}'.format(range)) resp = await self.make_request( 'GET', self.build_repo_url('git', 'blobs', file_sha), headers={'Accept': 'application/vnd.github.v3.raw'}, range=range, expects=(200, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp, size=data.size)
def download(self, path, revision=None, **kwargs): """Returns a ResponseWrapper (Stream) for the specified path raises FileNotFoundError if the status from Dataverse is not 200 :param str path: Path to the file you want to download :param str revision: Used to verify if file is in selected dataset - 'latest' to check draft files - 'latest-published' to check published files - None to check all data :param dict \*\*kwargs: Additional arguments that are ignored :rtype: :class:`waterbutler.core.streams.ResponseStreamReader` :raises: :class:`waterbutler.core.exceptions.DownloadError` """ if path.identifier is None: raise exceptions.NotFoundError(str(path)) resp = yield from self.make_request( 'GET', self.build_url(settings.DOWN_BASE_URL, path.identifier, key=self.token), expects=(200, ), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download(self, path, accept_url=False, range=None, **kwargs): """Returns a ResponseStreamReader (Stream) for the specified path :param str path: Path to the object you want to download :param dict \*\*kwargs: Additional arguments that are ignored :rtype str: :rtype ResponseStreamReader: :raises: exceptions.DownloadError """ self.metrics.add('download.accept_url', accept_url) if accept_url: parsed_url = furl.furl( self.sign_url(path, endpoint=self.public_endpoint)) parsed_url.args['filename'] = kwargs.get( 'displayName') or path.name return parsed_url.url resp = await self.make_request( 'GET', functools.partial(self.sign_url, path), range=range, expects=(200, 206), throws=exceptions.DownloadError, ) return streams.ResponseStreamReader(resp)
async def download( self, # type: ignore path: OneDrivePath, revision: str = None, range: typing.Tuple[int, int] = None, **kwargs) -> streams.ResponseStreamReader: r"""Download the file identified by ``path``. If ``revision`` is not ``None``, get the file at the version identified by ``revision``. API docs: https://dev.onedrive.com/items/download.htm :param str path: The path to the file on OneDrive :param str revision: The revision of the file to download. If ``None``, download latest. :param dict \*\*kwargs: Ignored :raises: :class:`waterbutler.core.exceptions.DownloadError` :rtype: waterbutler.core.streams.ResponseStreamReader :return: a stream of the contents of the file """ logger.debug( 'download path::{} path.identifier::{} revision::{} range::{} ' 'kwargs::{}'.format(path, path.identifier, revision, range, kwargs)) if path.identifier is None: raise exceptions.DownloadError('"{}" not found'.format(str(path)), code=404) download_url = None if revision: items = await self._revisions_json(path) for item in items['value']: if item['id'] == revision: try: download_url = item['@content.downloadUrl'] except KeyError: raise exceptions.UnexportableFileTypeError(str(path)) break else: # TODO: we should be able to get the download url from validate_v1_path metadata_resp = await self.make_request( 'GET', self._build_drive_url(*path.api_identifier), expects=(200, ), throws=exceptions.MetadataError) logger.debug('download metadata_resp::{}'.format( repr(metadata_resp))) metadata = await metadata_resp.json() logger.debug('download metadata::{}'.format(json.dumps(metadata))) try: package_type = metadata['package']['type'] except KeyError: pass else: if package_type == 'oneNote': raise exceptions.UnexportableFileTypeError(str(path)) download_url = metadata.get('@content.downloadUrl', None) if download_url is None: raise exceptions.NotFoundError(str(path)) logger.debug('download download_url::{}'.format(download_url)) download_resp = await self.make_request( 'GET', download_url, range=range, expects=(200, 206), headers={'accept-encoding': ''}, throws=exceptions.DownloadError, ) logger.debug('download download_resp::{}'.format(repr(download_resp))) return streams.ResponseStreamReader(download_resp)