示例#1
0
def TestOneInput(data):
  fdp = atheris.FuzzedDataProvider(data)
  whttp.parse_content_range_header(fdp.ConsumeUnicode(100))
  whttp.parse_range_header(fdp.ConsumeUnicode(100))
  whttp.parse_set_header(fdp.ConsumeUnicode(100))
  whttp.parse_etags(fdp.ConsumeUnicode(100))
  whttp.parse_if_range_header(fdp.ConsumeUnicode(100))
  whttp.parse_dict_header(fdp.ConsumeUnicode(100))
示例#2
0
 def _put_log(self, log_file, req):
     if req.content_length is None:
         raise LengthRequired()
     content_range = parse_content_range_header(req.headers.get('Content-Range'))
     if content_range:
         # a few sanity checks
         if req.content_length != (content_range.stop - content_range.start):
             raise BadRequest('Content length does not match range length')
         if content_range.length and content_range.length < content_range.stop:
             raise BadRequest('Total length is smaller than range end')
     try:
         with log_file:
             if content_range:
                 if content_range.length: # length may be '*' meaning unspecified
                     log_file.truncate(content_range.length)
                 log_file.update_chunk(req.data, content_range.start)
             else:
                 # no Content-Range, therefore the request is the whole file
                 log_file.truncate(req.content_length)
                 log_file.update_chunk(req.data, 0)
     # XXX need to find a less fragile way to do this
     except xmlrpclib.Fault, fault:
         if 'Cannot register file for finished ' in fault.faultString:
             return Response(status=409, response=fault.faultString,
                     content_type='text/plain')
         elif 'Too many ' in fault.faultString:
             return Response(status=403, response=fault.faultString,
                     content_type='text/plain')
         else:
             raise
示例#3
0
文件: proxy.py 项目: tcler/beaker
 def _put_log(self, log_file, req):
     if req.content_length is None:
         raise LengthRequired()
     content_range = parse_content_range_header(
         req.headers.get('Content-Range'))
     if content_range:
         # a few sanity checks
         if req.content_length != (content_range.stop -
                                   content_range.start):
             raise BadRequest('Content length does not match range length')
         if content_range.length and content_range.length < content_range.stop:
             raise BadRequest('Total length is smaller than range end')
     try:
         with log_file:
             if content_range:
                 if content_range.length:  # length may be '*' meaning unspecified
                     log_file.truncate(content_range.length)
                 log_file.update_chunk(req.data, content_range.start)
             else:
                 # no Content-Range, therefore the request is the whole file
                 log_file.truncate(req.content_length)
                 log_file.update_chunk(req.data, 0)
     # XXX need to find a less fragile way to do this
     except xmlrpclib.Fault, fault:
         if 'Cannot register file for finished ' in fault.faultString:
             return Response(status=409,
                             response=fault.faultString,
                             content_type='text/plain')
         elif 'Too many ' in fault.faultString:
             return Response(status=403,
                             response=fault.faultString,
                             content_type='text/plain')
         else:
             raise
示例#4
0
    def write_share_data(self, request, authorization, storage_index, share_number):
        """Write data to an in-progress immutable upload."""
        content_range = parse_content_range_header(request.getHeader("content-range"))
        if content_range is None or content_range.units != "bytes":
            request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE)
            return b""

        offset = content_range.start

        # TODO limit memory usage
        # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
        data = request.content.read(content_range.stop - content_range.start + 1)
        bucket = self._uploads.get_write_bucket(
            storage_index, share_number, authorization[Secrets.UPLOAD]
        )

        try:
            finished = bucket.write(offset, data)
        except ConflictingWriteError:
            request.setResponseCode(http.CONFLICT)
            return b""

        if finished:
            bucket.close()
            request.setResponseCode(http.CREATED)
        else:
            request.setResponseCode(http.OK)

        required = []
        for start, end, _ in bucket.required_ranges().ranges():
            required.append({"begin": start, "end": end})
        return self._send_encoded(request, {"required": required})
示例#5
0
    def put(self, session, restype, attachment_id):
        image_store_pymongo, _, attachment = session._fetch_attachment(restype, attachment_id)

        # only accept a single file from a form
        if len(request.files.items(multi=True)) != 1:
            abort(400, details='Exactly one file must be provided.')
        uploaded_file = request.files.itervalues().next()

        content_range = parse_content_range_header(request.headers.get('Content-Range'))
        if not content_range:
            abort(400, details='A PUT request to modify an attachment must include a Content-Range header.')
        if content_range.units != 'bytes':
            abort(400, details='Only a range-unit of "bytes" may be used in a Content-Range header.')
        if content_range.start is None:
            abort(400, details='The content\'s start and end positions must be specified in the Content-Range header.')
        # 'parse_content_range_header' guarantees that 'content_range.stop' must
        #   also be specified if 'start' is
        if content_range.length is None:
            # TODO: getting rid of this restriction would be nice, but we'd need
            #   a way to know when the final chunk was uploaded
            abort(400, details='The content\'s total length must be specified in the Content-Range header.')

        if content_range.start % attachment.chunkSize != 0:
            abort(400, details='The content\'s start location must be a multiple of the content\'s chunk size.')

        content_chunk_size = content_range.stop - content_range.start
        if (content_chunk_size != attachment.chunkSize) and (content_range.stop != content_range.length):
            # only the end chunk can be shorter
            abort(400, details='Upload content chunk size does not match existing GridFS chunk size.')

        chunk_num = (content_range.start / attachment.chunkSize)
        image_store_pymongo['attachments.chunks'].insert({
            'files_id': attachment._id,
            'n': chunk_num,
            'data': Binary(uploaded_file.read()),
        })

        # chunks may be sent out of order, so the only way to determine if all
        #   chunks were received is to count
        expected_chunks = int(math.ceil(float(content_range.length) / float(attachment.chunkSize)))
        received_chunks = image_store_pymongo['attachments.chunks'].find({'files_id': attachment._id}).count()
        if expected_chunks == received_chunks:
            # update the attachment metadata
            md5 = image_store_pymongo.command('filemd5', attachment._id, root='attachments')['md5']
            image_store_pymongo['attachments.files'].update(
                {'_id': attachment._id},
                {'$set': {
                    'length': content_range.length,
                    'md5': md5,
                    'uploadDate': datetime.datetime.utcnow()
                    }}
            )

        return None, 204  # No Content
示例#6
0
def file_uploader_ui():
    package_id = request.form['package_id']
    package_show = toolkit.get_action('package_show')
    # this ensures current user is authorized to view the package
    package = package_show(data_dict={'name_or_id': package_id})
    package_id = package['id']
    assert package

    file_storage = request.files['files[]'] # type: FileStorage
    file_range = parse_content_range_header(request.headers.get('Content-Range'))

    if file_range:
        log.debug("File Uploader Received File: %s [%d / %d]",file_storage.filename, file_range.stop, file_range.length)
    else:
        log.debug("File Uploader Received File: %s",file_storage.filename)

    storage_path = os.path.join(
        toolkit.config.get('ckan.storage_path'),
        toolkit.config.get('ckanext.file_uploader_ui_path', 'file_uploader_ui'),
        package_id)
    # Keep these logs appearing in production for the Jan 2020 West Africa meet

    try:
        os.makedirs(storage_path)
    except OSError as e:
        # errno 17 is file already exists
        if e.errno != 17:
            raise

    file_path = os.path.join(storage_path, file_storage.filename)

    try:

        if 0 and os.path.exists(file_path) and file_range.start == 0:
            # Abort if file exists already
            return toolkit.abort(400, 'File with that name already in progress')
        elif file_range is None or file_range.start == 0:
            log.debug("Bulk uploading to temporary file %s",file_path)
            with open(file_path, 'wb') as f:
                f.write(file_storage.stream.read())
        else:
            with open(file_path, 'ab') as f:
                f.seek(file_range.start)
                f.write(file_storage.stream.read())

    except OSError:
        # log.exception will include the traceback so we can see what's wrong
        log.exception('Failed to write content to file %s',file_path)
        return toolkit.abort(500, 'File upload failed')

    return jsonify({'files': [{'name': file_storage.filename, 'size':os.path.getsize(file_path)}]})
示例#7
0
    def test_content_range_parsing():
        rv = http.parse_content_range_header('bytes 0-98/*')
        assert rv.units == 'bytes'
        assert rv.start == 0
        assert rv.stop == 99
        assert rv.length is None
        assert rv.to_header() == 'bytes 0-98/*'

        rv = http.parse_content_range_header('bytes 0-98/*asdfsa')
        assert rv is None

        rv = http.parse_content_range_header('bytes 0-99/100')
        assert rv.to_header() == 'bytes 0-99/100'
        rv.start = None
        rv.stop = None
        assert rv.units == 'bytes'
        assert rv.to_header() == 'bytes */100'

        rv = http.parse_content_range_header('bytes */100')
        assert rv.start is None
        assert rv.stop is None
        assert rv.length == 100
        assert rv.units == 'bytes'
示例#8
0
    def test_content_range_parsing():
        rv = http.parse_content_range_header('bytes 0-98/*')
        assert rv.units == 'bytes'
        assert rv.start == 0
        assert rv.stop == 99
        assert rv.length is None
        assert rv.to_header() == 'bytes 0-98/*'

        rv = http.parse_content_range_header('bytes 0-98/*asdfsa')
        assert rv is None

        rv = http.parse_content_range_header('bytes 0-99/100')
        assert rv.to_header() == 'bytes 0-99/100'
        rv.start = None
        rv.stop = None
        assert rv.units == 'bytes'
        assert rv.to_header() == 'bytes */100'

        rv = http.parse_content_range_header('bytes */100')
        assert rv.start is None
        assert rv.stop is None
        assert rv.length == 100
        assert rv.units == 'bytes'
示例#9
0
    def test_content_range_parsing(self):
        rv = http.parse_content_range_header("bytes 0-98/*")
        assert rv.units == "bytes"
        assert rv.start == 0
        assert rv.stop == 99
        assert rv.length is None
        assert rv.to_header() == "bytes 0-98/*"

        rv = http.parse_content_range_header("bytes 0-98/*asdfsa")
        assert rv is None

        rv = http.parse_content_range_header("bytes 0-99/100")
        assert rv.to_header() == "bytes 0-99/100"
        rv.start = None
        rv.stop = None
        assert rv.units == "bytes"
        assert rv.to_header() == "bytes */100"

        rv = http.parse_content_range_header("bytes */100")
        assert rv.start is None
        assert rv.stop is None
        assert rv.length == 100
        assert rv.units == "bytes"
示例#10
0
    def test_content_range_parsing():
        rv = http.parse_content_range_header("bytes 0-98/*")
        assert rv.units == "bytes"
        assert rv.start == 0
        assert rv.stop == 99
        assert rv.length is None
        assert rv.to_header() == "bytes 0-98/*"

        rv = http.parse_content_range_header("bytes 0-98/*asdfsa")
        assert rv is None

        rv = http.parse_content_range_header("bytes 0-99/100")
        assert rv.to_header() == "bytes 0-99/100"
        rv.start = None
        rv.stop = None
        assert rv.units == "bytes"
        assert rv.to_header() == "bytes */100"

        rv = http.parse_content_range_header("bytes */100")
        assert rv.start is None
        assert rv.stop is None
        assert rv.length == 100
        assert rv.units == "bytes"
示例#11
0
    def content_range(self) -> ContentRange:
        """The ``Content-Range`` header as a
        :class:`~werkzeug.datastructures.ContentRange` object. Available
        even if the header is not set.

        .. versionadded:: 0.7
        """
        def on_update(rng: ContentRange) -> None:
            if not rng:
                del self.headers["content-range"]
            else:
                self.headers["Content-Range"] = rng.to_header()

        rv = parse_content_range_header(self.headers.get("content-range"),
                                        on_update)
        # always provide a content range object to make the descriptor
        # more user friendly.  It provides an unset() method that can be
        # used to remove the header quickly.
        if rv is None:
            rv = ContentRange(None, None, None, on_update=on_update)
        return rv
示例#12
0
    def parse_content_range(
        range_header: Optional[str],
    ) -> Tuple[Optional[int], Optional[int], Optional[int]]:

        if range_header is None:
            return None, None, None

        content_range = parse_content_range_header(range_header)

        if content_range is None:
            log.error("Unable to parse Content-Range: {}", range_header)
            tokens = range_header.split("/")

            if len(tokens) != 2:
                log.error("Invalid Content-Range: {}", range_header)
                return None, None, None

            if not tokens[1].isnumeric():
                log.error("Invalid Content-Range: {}", range_header)
                return None, None, None

            total_length = int(tokens[1])
            start = 0
            stop = total_length

            return total_length, start, stop

        total_length = int(content_range.length)
        # es: 'bytes */35738983'
        if content_range.start is None:
            start = 0
        else:
            start = int(content_range.start)

        if content_range.stop is None:
            stop = total_length
        else:
            stop = int(content_range.stop)

        return total_length, start, stop
示例#13
0
    def parse_content_range(
        range_header: Optional[str],
    ) -> Tuple[Optional[int], Optional[int], Optional[int]]:

        if range_header is None:
            return None, None, None

        content_range = parse_content_range_header(range_header)

        if content_range is None:
            log.error("Unable to parse Content-Range: {}", range_header)
            tokens = range_header.split("/")

            if len(tokens) != 2:
                log.error("Invalid Content-Range: {}", range_header)
                return None, None, None

            if not tokens[1].isnumeric():
                log.error("Invalid Content-Range: {}", range_header)
                return None, None, None

            # A pattern like */len is expected
            # => is returned start == 0 and stop == len
            tot_len = int(tokens[1])
            return tot_len, 0, tot_len

        total_length = content_range.length
        # es: 'bytes */35738983'
        if content_range.start is None:
            start = 0
        else:
            start = content_range.start

        if content_range.stop is None:
            stop = total_length
        else:
            stop = content_range.stop

        return total_length, start, stop
示例#14
0
    def content_range(self) -> ContentRange:
        def on_update(cache_range: ContentRange) -> None:
            self.content_range = cache_range

        return parse_content_range_header(self.headers.get("Content-Range"),
                                          on_update)
示例#15
0
    def chunk_upload(self, upload_dir, filename, chunk_size=None):
        filename = secure_filename(filename)

        try:
            range_header = request.headers.get("Content-Range")
            # content_length = request.headers.get("Content-Length")
            content_range = parse_content_range_header(range_header)

            if content_range is None:
                log.error("Unable to parse Content-Range: {}", range_header)
                completed = True
                start = 0
                total_length = int(range_header.split("/")[1])
                stop = int(total_length)
            else:
                # log.warning(content_range)
                start = int(content_range.start)
                stop = int(content_range.stop)
                total_length = int(content_range.length)
                # log.critical(content_range.start)
                # log.critical(content_range.stop)
                # log.critical(content_range.length)
                # log.critical(content_range.units)
                completed = (stop >= total_length)
        except BaseException as e:
            log.error("Unable to parse Content-Range: {}", range_header)
            log.error(str(e))
            completed = False
            return completed, self.force_response("Invalid request")

        # Default chunk size, put this somewhere
        if chunk_size is None:
            chunk_size = 1048576

        file_path = os.path.join(upload_dir, filename)
        with open(file_path, "ab") as f:
            while True:
                chunk = request.stream.read(chunk_size)
                if not chunk:
                    break
                f.seek(start)
                f.write(chunk)

        if completed:

            # Extra info
            ftype = None
            fcharset = None
            try:
                # Check the type
                from plumbum.cmd import file

                out = file["-ib", file_path]()
                tmp = out.split(';')
                ftype = tmp[0].strip()
                fcharset = tmp[1].split('=')[1].strip()
            except Exception:
                log.warning("Unknown type for '{}'", file_path)

            return completed, self.force_response(
                {
                    'filename': filename,
                    'meta': {
                        'type': ftype,
                        'charset': fcharset
                    }
                },
                code=200)

        return completed, self.force_response(
            "partial",
            headers={
                "Access-Control-Expose-Headers": "Range",
                "Range": "0-{}".format(stop - 1)
            },
            code=206)
示例#16
0
    def post(self, session, restype="attachments"):
        if restype == "attachments":

            if len(request.files.items(multi=True)) != 1:
                abort(400, details='Exactly one file must be provided.')
            uploaded_file = request.files.itervalues().next()

            gridfs_args = dict()

            # file name
            if not uploaded_file.filename:
                abort(400, details='The file must contain a filename.')
            # TODO: filter the filename to remove special characters and ensure length < 255
            gridfs_args['filename'] = uploaded_file.filename

            # chunked uploads
            content_range = parse_content_range_header(request.headers.get('Content-Range'))
            if content_range and (content_range.stop != content_range.length):
                if content_range.start != 0:
                    abort(400, details='A POST with partial content must not contain a fragment past the start of the entity.')
                if content_range.units != 'bytes':
                    abort(400, details='Only a range-unit of "bytes" may be used in a Content-Range header.')
                content_chunk_size = content_range.stop - content_range.start
                gridfs_args['chunkSize'] = content_chunk_size

            # content type
            # TODO: get the content type via libmagic, so a client can't falsify it
            #   via headers or filename extension
            # TODO: reject dangerous file types, like .exe or .html
            # first, try the client's headers for content type
            if uploaded_file.mimetype and uploaded_file.mimetype != 'application/octet-stream':
                # "mimetype" doesn't include charset options
                gridfs_args['contentType'] = uploaded_file.mimetype
            else:
                # if the headers are non-specific, try the filename extension
                extension_content_type = mimetypes.guess_type(uploaded_file.filename, strict=False)[0]
                if extension_content_type:
                    gridfs_args['contentType'] = extension_content_type
            # if getting the content type failed, leave "gridfs_args['contentType']" unset

            # save into GridFS
            image_store = session.collection.image_store
            attachments_fs = gridfs.GridFS(image_store.to_pymongo(raw_object=True), 'attachments')
            attachment = attachments_fs.new_file(**gridfs_args)
            try:
                # this will stream the IO, instead of loading it all into memory
                uploaded_file.save(attachment)
            finally:
                attachment.close()
                uploaded_file.close()

            # add to session
            attachments_ref = models.RefItem(ref=attachment._id, db=image_store.id)
            session.attachments.append(attachments_ref)
            session.save()

            # return response
            new_location = url_for('.session_attachment_item', session=session,
                                   restype=restype, attachment_id=attachment._id)
            return (None,  # TODO: return body with metadata?
                    201,  # Created
                    {'Location': new_location})

        elif restype == "imagefiles":
            # Need to create a location in sessions with current image store as the default image store
            # add to session
            ref_id = bson.ObjectId()
            return {"id": str(ref_id), "restype": "images"}, 201

        else:
            # Should never reach here
            return {"Error": "Unknown restype: " + restype}, 404