Пример #1
0
    async def upload(self, stream, path, conflict='replace', **kwargs):
        """Upload a file to provider root or to an article whose defined_type is
        configured to represent a folder.

        :param asyncio.StreamReader stream: stream to upload
        :param FigsharePath path: FigsharePath to upload the file to.
        :param dict \*\*kwargs: Will be passed to returned metadata object
        """
        path, exists = await self.handle_name_conflict(path, conflict=conflict)
        if not path.parent.is_root:
            parent_resp = await self.make_request(
                'GET',
                self.build_url(False, *self.root_path_parts, 'articles',
                               path.parent.identifier),
                expects=(200, ),
            )
            parent_json = await parent_resp.json()
            if not parent_json['defined_type'] in settings.FOLDER_TYPES:
                del path._parts[1]

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))
        file_id = await self._upload_file(self.container_id, path.name, stream)

        # Build new file path and return metadata
        path = FigsharePath('/' + file_id,
                            _ids=('', file_id),
                            folder=False,
                            is_public=False)
        metadata = await self.metadata(path, **kwargs)
        if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']:
            raise exceptions.UploadChecksumMismatchError()

        return metadata, True
Пример #2
0
    async def _create_blob(self, stream):
        blob_stream = streams.JSONStream({
            'encoding': 'base64',
            'content': streams.Base64EncodeStream(stream),
        })

        sha1_calculator = streams.HashStreamWriter(hashlib.sha1)
        stream.add_writer('sha1', sha1_calculator)
        git_blob_header = 'blob {}\0'.format(str(stream.size))
        sha1_calculator.write(git_blob_header.encode('utf-8'))

        resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'blobs'),
            data=blob_stream,
            headers={
                'Content-Type': 'application/json',
                'Content-Length': str(blob_stream.size),
            },
            expects=(201, ),
            throws=exceptions.UploadError,
        )

        blob_metadata = await resp.json()
        if stream.writers['sha1'].hexdigest != blob_metadata['sha']:
            raise exceptions.UploadChecksumMismatchError()

        return blob_metadata
Пример #3
0
    async def _contiguous_upload(self, stream, path):
        """Uploads the given stream in one request.
        """

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        headers = {'Content-Length': str(stream.size)}
        # this is usually set in boto.s3.key.generate_url, but do it here
        # do be explicit about our header payloads for signing purposes
        if self.encrypt_uploads:
            headers['x-amz-server-side-encryption'] = 'AES256'
        upload_url = functools.partial(
            self.bucket.new_key(path.path).generate_url,
            settings.TEMP_URL_SECS,
            'PUT',
            headers=headers,
        )

        resp = await self.make_request(
            'PUT',
            upload_url,
            data=stream,
            skip_auto_headers={'CONTENT-TYPE'},
            headers=headers,
            expects=(200, 201, ),
            throws=exceptions.UploadError,
        )
        await resp.release()

        # md5 is returned as ETag header as long as server side encryption is not used.
        if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace('"', ''):
            raise exceptions.UploadChecksumMismatchError()
Пример #4
0
    async def upload(self, stream, path, **kwargs):
        """Zips the given stream then uploads to Dataverse.
        This will delete existing draft files with the same name.

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse
        :param str path: The filename prepended with '/'

        :rtype: dict, bool
        """

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        zip_stream = streams.ZipStreamReader(
            AsyncIterator([(path.name, stream)]))

        # Write stream to disk (Necessary to find zip file size)
        f = tempfile.TemporaryFile()
        chunk = await zip_stream.read()
        while chunk:
            f.write(chunk)
            chunk = await zip_stream.read()
        file_stream = streams.FileStreamReader(f)

        dv_headers = {
            "Content-Disposition": "filename=temp.zip",
            "Content-Type": "application/zip",
            "Packaging": "http://purl.org/net/sword/package/SimpleZip",
            "Content-Length": str(file_stream.size),
        }

        # Delete old file if it exists
        if path.identifier:
            await self.delete(path)

        resp = await self.make_request('POST',
                                       self.build_url(
                                           settings.EDIT_MEDIA_BASE_URL,
                                           'study', self.doi),
                                       headers=dv_headers,
                                       auth=(self.token, ),
                                       data=file_stream,
                                       expects=(201, ),
                                       throws=exceptions.UploadError)
        await resp.release()

        # Find appropriate version of file
        metadata = await self._get_data('latest')
        files = metadata if isinstance(metadata, list) else []
        file_metadata = next(file for file in files if file.name == path.name)

        if stream.writers['md5'].hexdigest != file_metadata.extra['hashes'][
                'md5']:
            raise exceptions.UploadChecksumMismatchError()

        return file_metadata, path.identifier is None
Пример #5
0
    async def upload(self, stream, path, conflict='replace', **kwargs):
        """Upload a file to provider root or to an article whose defined_type is
        configured to represent a folder.

        :param asyncio.StreamReader stream: stream to upload
        :param FigsharePath path: FigsharePath to upload the file to.
        :param dict \*\*kwargs: Will be passed to returned metadata object
        """
        if path.identifier and conflict == 'replace':
            raise exceptions.UnsupportedOperationError(
                'Files in Figshare cannot be updated')

        path, exists = await self.handle_name_conflict(path, conflict=conflict)
        if not path.parent.is_root:
            parent_resp = await self.make_request(
                'GET',
                self.build_url(False, *self.root_path_parts, 'articles',
                               path.parent.identifier),
                expects=(200, ),
            )
            parent_json = await parent_resp.json()
            if not parent_json['defined_type'] in settings.FOLDER_TYPES:
                del path._parts[1]

        # Create article or retrieve article_id from existing article
        if not path.parent.is_root:
            article_id = path.parent.identifier
        else:
            article_name = json.dumps({'title': path.name})
            if self.container_type == 'project':
                article_id = await self._create_article(article_name)
            elif self.container_type == 'collection':
                # TODO don't think this is correct.  Probably should POST to /accounts/articles
                article_id = await self._create_article(article_name)
                article_list = json.dumps({'articles': [article_id]})
                await self.make_request(
                    'POST',
                    self.build_url(False, *self.root_path_parts, 'articles'),
                    data=article_list,
                    expects=(201, ),
                )

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))
        file_id = await self._upload_file(article_id, path.name, stream)

        # Build new file path and return metadata
        path = FigsharePath('/' + article_id + '/' + file_id,
                            _ids=(self.container_id, article_id, file_id),
                            folder=False,
                            is_public=False)
        metadata = await self.metadata(path, **kwargs)
        if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']:
            raise exceptions.UploadChecksumMismatchError()

        return metadata, True
Пример #6
0
    async def upload(self, stream, path, conflict='replace', **kwargs):
        """Uploads the given stream to S3

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to S3
        :param str path: The full path of the key to upload to/into

        :rtype: dict, bool
        """
        await self._check_region()

        path, exists = await self.handle_name_conflict(path, conflict=conflict)
        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        headers = {'Content-Length': str(stream.size)}

        # this is usually set in boto.s3.key.generate_url, but do it here
        # do be explicit about our header payloads for signing purposes
        if self.encrypt_uploads:
            headers['x-amz-server-side-encryption'] = 'AES256'

        upload_url = functools.partial(
            self.bucket.new_key(path.path).generate_url,
            settings.TEMP_URL_SECS,
            'PUT',
            headers=headers,
        )
        resp = await self.make_request(
            'PUT',
            upload_url,
            data=stream,
            skip_auto_headers={'CONTENT-TYPE'},
            headers=headers,
            expects=(
                200,
                201,
            ),
            throws=exceptions.UploadError,
        )
        # md5 is returned as ETag header as long as server side encryption is not used.
        if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace(
                '"', ''):
            raise exceptions.UploadChecksumMismatchError()

        await resp.release()
        return (await self.metadata(path, **kwargs)), not exists
Пример #7
0
    async def upload(
            self,  # type: ignore
            stream: streams.BaseStream,
            path: WaterButlerPath,
            conflict: str = 'replace',
            **kwargs) -> Tuple[BoxFileMetadata, bool]:
        if path.identifier and conflict == 'keep':
            path, _ = await self.handle_name_conflict(path,
                                                      conflict=conflict,
                                                      kind='folder')
            path._parts[-1]._id = None

        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        data_stream = streams.FormDataStream(
            attributes=json.dumps({
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            }))
        data_stream.add_file('file',
                             stream,
                             path.name,
                             disposition='form-data')

        async with self.request(
                'POST',
                self._build_upload_url(
                    *filter(lambda x: x is not None, ('files', path.identifier,
                                                      'content'))),
                data=data_stream,
                headers=data_stream.headers,
                expects=(201, ),
                throws=exceptions.UploadError,
        ) as resp:
            data = await resp.json()

        entry = data['entries'][0]
        if stream.writers['sha1'].hexdigest != entry['sha1']:
            raise exceptions.UploadChecksumMismatchError()

        created = path.identifier is None
        path._parts[-1]._id = entry['id']
        return BoxFileMetadata(entry, path), created
Пример #8
0
    async def _contiguous_upload(self, path: WaterButlerPath,
                                 stream: streams.BaseStream) -> dict:
        """Upload a file to Box using a single request. This will only be called if the file is
        smaller than the ``NONCHUNKED_UPLOAD_LIMIT``.

        API Docs: https://developer.box.com/reference#upload-a-file
        """
        assert stream.size <= self.NONCHUNKED_UPLOAD_LIMIT
        stream.add_writer('sha1', streams.HashStreamWriter(hashlib.sha1))

        data_stream = streams.FormDataStream(
            attributes=json.dumps({
                'name': path.name,
                'parent': {
                    'id': path.parent.identifier
                }
            }))
        data_stream.add_file('file',
                             stream,
                             path.name,
                             disposition='form-data')

        if path.identifier is not None:
            segments = ['files', path.identifier, 'content']
        else:
            segments = ['files', 'content']

        response = await self.make_request(
            'POST',
            self._build_upload_url(*segments),
            data=data_stream,
            headers=data_stream.headers,
            expects=(201, ),
            throws=exceptions.UploadError,
        )
        data = await response.json()

        entry = data['entries'][0]
        if stream.writers['sha1'].hexdigest != entry['sha1']:
            raise exceptions.UploadChecksumMismatchError()

        return entry
Пример #9
0
    async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \
            -> typing.Tuple[GoogleDriveFileMetadata, bool]:
        assert path.is_file

        if path.identifier:
            segments = [path.identifier]
        else:
            segments = []

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        upload_metadata = self._build_upload_metadata(path.parent.identifier, path.name)
        upload_id = await self._start_resumable_upload(not path.identifier, segments, stream.size,
                                                       upload_metadata)
        data = await self._finish_resumable_upload(segments, stream, upload_id)

        if data['md5Checksum'] != stream.writers['md5'].hexdigest:
            raise exceptions.UploadChecksumMismatchError()

        return GoogleDriveFileMetadata(data, path), path.identifier is None
Пример #10
0
    async def upload(self,
                     stream,
                     path,
                     check_created=True,
                     fetch_metadata=True,
                     **kwargs):
        """Uploads the given stream to CloudFiles
        :param ResponseStreamReader stream: The stream to put to CloudFiles
        :param str path: The full path of the object to upload to/into
        :rtype ResponseStreamReader:
        """
        if check_created:
            created = not (await self.exists(path))
        else:
            created = None
        self.metrics.add('upload.check_created', check_created)

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))
        resp = await self.make_request(
            'PUT',
            functools.partial(self.sign_url, path, 'PUT'),
            data=stream,
            headers={'Content-Length': str(stream.size)},
            expects=(200, 201),
            throws=exceptions.UploadError,
        )
        await resp.release()
        # md5 is returned as ETag header as long as server side encryption is not used.
        if stream.writers['md5'].hexdigest != resp.headers['ETag'].replace(
                '"', ''):
            raise exceptions.UploadChecksumMismatchError()

        if fetch_metadata:
            metadata = await self.metadata(path)
        else:
            metadata = None
        self.metrics.add('upload.fetch_metadata', fetch_metadata)

        return metadata, created