def TestOneInput(data): fdp = atheris.FuzzedDataProvider(data) whttp.parse_content_range_header(fdp.ConsumeUnicode(100)) whttp.parse_range_header(fdp.ConsumeUnicode(100)) whttp.parse_set_header(fdp.ConsumeUnicode(100)) whttp.parse_etags(fdp.ConsumeUnicode(100)) whttp.parse_if_range_header(fdp.ConsumeUnicode(100)) whttp.parse_dict_header(fdp.ConsumeUnicode(100))
def _put_log(self, log_file, req): if req.content_length is None: raise LengthRequired() content_range = parse_content_range_header(req.headers.get('Content-Range')) if content_range: # a few sanity checks if req.content_length != (content_range.stop - content_range.start): raise BadRequest('Content length does not match range length') if content_range.length and content_range.length < content_range.stop: raise BadRequest('Total length is smaller than range end') try: with log_file: if content_range: if content_range.length: # length may be '*' meaning unspecified log_file.truncate(content_range.length) log_file.update_chunk(req.data, content_range.start) else: # no Content-Range, therefore the request is the whole file log_file.truncate(req.content_length) log_file.update_chunk(req.data, 0) # XXX need to find a less fragile way to do this except xmlrpclib.Fault, fault: if 'Cannot register file for finished ' in fault.faultString: return Response(status=409, response=fault.faultString, content_type='text/plain') elif 'Too many ' in fault.faultString: return Response(status=403, response=fault.faultString, content_type='text/plain') else: raise
def _put_log(self, log_file, req): if req.content_length is None: raise LengthRequired() content_range = parse_content_range_header( req.headers.get('Content-Range')) if content_range: # a few sanity checks if req.content_length != (content_range.stop - content_range.start): raise BadRequest('Content length does not match range length') if content_range.length and content_range.length < content_range.stop: raise BadRequest('Total length is smaller than range end') try: with log_file: if content_range: if content_range.length: # length may be '*' meaning unspecified log_file.truncate(content_range.length) log_file.update_chunk(req.data, content_range.start) else: # no Content-Range, therefore the request is the whole file log_file.truncate(req.content_length) log_file.update_chunk(req.data, 0) # XXX need to find a less fragile way to do this except xmlrpclib.Fault, fault: if 'Cannot register file for finished ' in fault.faultString: return Response(status=409, response=fault.faultString, content_type='text/plain') elif 'Too many ' in fault.faultString: return Response(status=403, response=fault.faultString, content_type='text/plain') else: raise
def write_share_data(self, request, authorization, storage_index, share_number): """Write data to an in-progress immutable upload.""" content_range = parse_content_range_header(request.getHeader("content-range")) if content_range is None or content_range.units != "bytes": request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) return b"" offset = content_range.start # TODO limit memory usage # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 data = request.content.read(content_range.stop - content_range.start + 1) bucket = self._uploads.get_write_bucket( storage_index, share_number, authorization[Secrets.UPLOAD] ) try: finished = bucket.write(offset, data) except ConflictingWriteError: request.setResponseCode(http.CONFLICT) return b"" if finished: bucket.close() request.setResponseCode(http.CREATED) else: request.setResponseCode(http.OK) required = [] for start, end, _ in bucket.required_ranges().ranges(): required.append({"begin": start, "end": end}) return self._send_encoded(request, {"required": required})
def put(self, session, restype, attachment_id): image_store_pymongo, _, attachment = session._fetch_attachment(restype, attachment_id) # only accept a single file from a form if len(request.files.items(multi=True)) != 1: abort(400, details='Exactly one file must be provided.') uploaded_file = request.files.itervalues().next() content_range = parse_content_range_header(request.headers.get('Content-Range')) if not content_range: abort(400, details='A PUT request to modify an attachment must include a Content-Range header.') if content_range.units != 'bytes': abort(400, details='Only a range-unit of "bytes" may be used in a Content-Range header.') if content_range.start is None: abort(400, details='The content\'s start and end positions must be specified in the Content-Range header.') # 'parse_content_range_header' guarantees that 'content_range.stop' must # also be specified if 'start' is if content_range.length is None: # TODO: getting rid of this restriction would be nice, but we'd need # a way to know when the final chunk was uploaded abort(400, details='The content\'s total length must be specified in the Content-Range header.') if content_range.start % attachment.chunkSize != 0: abort(400, details='The content\'s start location must be a multiple of the content\'s chunk size.') content_chunk_size = content_range.stop - content_range.start if (content_chunk_size != attachment.chunkSize) and (content_range.stop != content_range.length): # only the end chunk can be shorter abort(400, details='Upload content chunk size does not match existing GridFS chunk size.') chunk_num = (content_range.start / attachment.chunkSize) image_store_pymongo['attachments.chunks'].insert({ 'files_id': attachment._id, 'n': chunk_num, 'data': Binary(uploaded_file.read()), }) # chunks may be sent out of order, so the only way to determine if all # chunks were received is to count expected_chunks = int(math.ceil(float(content_range.length) / float(attachment.chunkSize))) received_chunks = image_store_pymongo['attachments.chunks'].find({'files_id': attachment._id}).count() if expected_chunks == received_chunks: # update the attachment metadata md5 = image_store_pymongo.command('filemd5', attachment._id, root='attachments')['md5'] image_store_pymongo['attachments.files'].update( {'_id': attachment._id}, {'$set': { 'length': content_range.length, 'md5': md5, 'uploadDate': datetime.datetime.utcnow() }} ) return None, 204 # No Content
def file_uploader_ui(): package_id = request.form['package_id'] package_show = toolkit.get_action('package_show') # this ensures current user is authorized to view the package package = package_show(data_dict={'name_or_id': package_id}) package_id = package['id'] assert package file_storage = request.files['files[]'] # type: FileStorage file_range = parse_content_range_header(request.headers.get('Content-Range')) if file_range: log.debug("File Uploader Received File: %s [%d / %d]",file_storage.filename, file_range.stop, file_range.length) else: log.debug("File Uploader Received File: %s",file_storage.filename) storage_path = os.path.join( toolkit.config.get('ckan.storage_path'), toolkit.config.get('ckanext.file_uploader_ui_path', 'file_uploader_ui'), package_id) # Keep these logs appearing in production for the Jan 2020 West Africa meet try: os.makedirs(storage_path) except OSError as e: # errno 17 is file already exists if e.errno != 17: raise file_path = os.path.join(storage_path, file_storage.filename) try: if 0 and os.path.exists(file_path) and file_range.start == 0: # Abort if file exists already return toolkit.abort(400, 'File with that name already in progress') elif file_range is None or file_range.start == 0: log.debug("Bulk uploading to temporary file %s",file_path) with open(file_path, 'wb') as f: f.write(file_storage.stream.read()) else: with open(file_path, 'ab') as f: f.seek(file_range.start) f.write(file_storage.stream.read()) except OSError: # log.exception will include the traceback so we can see what's wrong log.exception('Failed to write content to file %s',file_path) return toolkit.abort(500, 'File upload failed') return jsonify({'files': [{'name': file_storage.filename, 'size':os.path.getsize(file_path)}]})
def test_content_range_parsing(): rv = http.parse_content_range_header('bytes 0-98/*') assert rv.units == 'bytes' assert rv.start == 0 assert rv.stop == 99 assert rv.length is None assert rv.to_header() == 'bytes 0-98/*' rv = http.parse_content_range_header('bytes 0-98/*asdfsa') assert rv is None rv = http.parse_content_range_header('bytes 0-99/100') assert rv.to_header() == 'bytes 0-99/100' rv.start = None rv.stop = None assert rv.units == 'bytes' assert rv.to_header() == 'bytes */100' rv = http.parse_content_range_header('bytes */100') assert rv.start is None assert rv.stop is None assert rv.length == 100 assert rv.units == 'bytes'
def test_content_range_parsing(self): rv = http.parse_content_range_header("bytes 0-98/*") assert rv.units == "bytes" assert rv.start == 0 assert rv.stop == 99 assert rv.length is None assert rv.to_header() == "bytes 0-98/*" rv = http.parse_content_range_header("bytes 0-98/*asdfsa") assert rv is None rv = http.parse_content_range_header("bytes 0-99/100") assert rv.to_header() == "bytes 0-99/100" rv.start = None rv.stop = None assert rv.units == "bytes" assert rv.to_header() == "bytes */100" rv = http.parse_content_range_header("bytes */100") assert rv.start is None assert rv.stop is None assert rv.length == 100 assert rv.units == "bytes"
def test_content_range_parsing(): rv = http.parse_content_range_header("bytes 0-98/*") assert rv.units == "bytes" assert rv.start == 0 assert rv.stop == 99 assert rv.length is None assert rv.to_header() == "bytes 0-98/*" rv = http.parse_content_range_header("bytes 0-98/*asdfsa") assert rv is None rv = http.parse_content_range_header("bytes 0-99/100") assert rv.to_header() == "bytes 0-99/100" rv.start = None rv.stop = None assert rv.units == "bytes" assert rv.to_header() == "bytes */100" rv = http.parse_content_range_header("bytes */100") assert rv.start is None assert rv.stop is None assert rv.length == 100 assert rv.units == "bytes"
def content_range(self) -> ContentRange: """The ``Content-Range`` header as a :class:`~werkzeug.datastructures.ContentRange` object. Available even if the header is not set. .. versionadded:: 0.7 """ def on_update(rng: ContentRange) -> None: if not rng: del self.headers["content-range"] else: self.headers["Content-Range"] = rng.to_header() rv = parse_content_range_header(self.headers.get("content-range"), on_update) # always provide a content range object to make the descriptor # more user friendly. It provides an unset() method that can be # used to remove the header quickly. if rv is None: rv = ContentRange(None, None, None, on_update=on_update) return rv
def parse_content_range( range_header: Optional[str], ) -> Tuple[Optional[int], Optional[int], Optional[int]]: if range_header is None: return None, None, None content_range = parse_content_range_header(range_header) if content_range is None: log.error("Unable to parse Content-Range: {}", range_header) tokens = range_header.split("/") if len(tokens) != 2: log.error("Invalid Content-Range: {}", range_header) return None, None, None if not tokens[1].isnumeric(): log.error("Invalid Content-Range: {}", range_header) return None, None, None total_length = int(tokens[1]) start = 0 stop = total_length return total_length, start, stop total_length = int(content_range.length) # es: 'bytes */35738983' if content_range.start is None: start = 0 else: start = int(content_range.start) if content_range.stop is None: stop = total_length else: stop = int(content_range.stop) return total_length, start, stop
def parse_content_range( range_header: Optional[str], ) -> Tuple[Optional[int], Optional[int], Optional[int]]: if range_header is None: return None, None, None content_range = parse_content_range_header(range_header) if content_range is None: log.error("Unable to parse Content-Range: {}", range_header) tokens = range_header.split("/") if len(tokens) != 2: log.error("Invalid Content-Range: {}", range_header) return None, None, None if not tokens[1].isnumeric(): log.error("Invalid Content-Range: {}", range_header) return None, None, None # A pattern like */len is expected # => is returned start == 0 and stop == len tot_len = int(tokens[1]) return tot_len, 0, tot_len total_length = content_range.length # es: 'bytes */35738983' if content_range.start is None: start = 0 else: start = content_range.start if content_range.stop is None: stop = total_length else: stop = content_range.stop return total_length, start, stop
def content_range(self) -> ContentRange: def on_update(cache_range: ContentRange) -> None: self.content_range = cache_range return parse_content_range_header(self.headers.get("Content-Range"), on_update)
def chunk_upload(self, upload_dir, filename, chunk_size=None): filename = secure_filename(filename) try: range_header = request.headers.get("Content-Range") # content_length = request.headers.get("Content-Length") content_range = parse_content_range_header(range_header) if content_range is None: log.error("Unable to parse Content-Range: {}", range_header) completed = True start = 0 total_length = int(range_header.split("/")[1]) stop = int(total_length) else: # log.warning(content_range) start = int(content_range.start) stop = int(content_range.stop) total_length = int(content_range.length) # log.critical(content_range.start) # log.critical(content_range.stop) # log.critical(content_range.length) # log.critical(content_range.units) completed = (stop >= total_length) except BaseException as e: log.error("Unable to parse Content-Range: {}", range_header) log.error(str(e)) completed = False return completed, self.force_response("Invalid request") # Default chunk size, put this somewhere if chunk_size is None: chunk_size = 1048576 file_path = os.path.join(upload_dir, filename) with open(file_path, "ab") as f: while True: chunk = request.stream.read(chunk_size) if not chunk: break f.seek(start) f.write(chunk) if completed: # Extra info ftype = None fcharset = None try: # Check the type from plumbum.cmd import file out = file["-ib", file_path]() tmp = out.split(';') ftype = tmp[0].strip() fcharset = tmp[1].split('=')[1].strip() except Exception: log.warning("Unknown type for '{}'", file_path) return completed, self.force_response( { 'filename': filename, 'meta': { 'type': ftype, 'charset': fcharset } }, code=200) return completed, self.force_response( "partial", headers={ "Access-Control-Expose-Headers": "Range", "Range": "0-{}".format(stop - 1) }, code=206)
def post(self, session, restype="attachments"): if restype == "attachments": if len(request.files.items(multi=True)) != 1: abort(400, details='Exactly one file must be provided.') uploaded_file = request.files.itervalues().next() gridfs_args = dict() # file name if not uploaded_file.filename: abort(400, details='The file must contain a filename.') # TODO: filter the filename to remove special characters and ensure length < 255 gridfs_args['filename'] = uploaded_file.filename # chunked uploads content_range = parse_content_range_header(request.headers.get('Content-Range')) if content_range and (content_range.stop != content_range.length): if content_range.start != 0: abort(400, details='A POST with partial content must not contain a fragment past the start of the entity.') if content_range.units != 'bytes': abort(400, details='Only a range-unit of "bytes" may be used in a Content-Range header.') content_chunk_size = content_range.stop - content_range.start gridfs_args['chunkSize'] = content_chunk_size # content type # TODO: get the content type via libmagic, so a client can't falsify it # via headers or filename extension # TODO: reject dangerous file types, like .exe or .html # first, try the client's headers for content type if uploaded_file.mimetype and uploaded_file.mimetype != 'application/octet-stream': # "mimetype" doesn't include charset options gridfs_args['contentType'] = uploaded_file.mimetype else: # if the headers are non-specific, try the filename extension extension_content_type = mimetypes.guess_type(uploaded_file.filename, strict=False)[0] if extension_content_type: gridfs_args['contentType'] = extension_content_type # if getting the content type failed, leave "gridfs_args['contentType']" unset # save into GridFS image_store = session.collection.image_store attachments_fs = gridfs.GridFS(image_store.to_pymongo(raw_object=True), 'attachments') attachment = attachments_fs.new_file(**gridfs_args) try: # this will stream the IO, instead of loading it all into memory uploaded_file.save(attachment) finally: attachment.close() uploaded_file.close() # add to session attachments_ref = models.RefItem(ref=attachment._id, db=image_store.id) session.attachments.append(attachments_ref) session.save() # return response new_location = url_for('.session_attachment_item', session=session, restype=restype, attachment_id=attachment._id) return (None, # TODO: return body with metadata? 201, # Created {'Location': new_location}) elif restype == "imagefiles": # Need to create a location in sessions with current image store as the default image store # add to session ref_id = bson.ObjectId() return {"id": str(ref_id), "restype": "images"}, 201 else: # Should never reach here return {"Error": "Unknown restype: " + restype}, 404