def _get_disp_filename(headers: MutableHeaders) -> t.Optional[bytes]: if "content-disposition" in headers: _, options = multipart.parse_options_header( headers["content-disposition"]) if b"filename" in options: return t.cast(bytes, options[b"filename"]) return None
def __init__(self, request: Request, body: typing.AsyncGenerator[bytes, None]) -> None: self.request = request self.body = body self.messages = [] _type = self.request.headers.get("content-type", None) self.content_type, self.params = parse_options_header(_type)
def callback( # noqa: CCR001 name: str, data: typing.Optional[bytes] = None, start: typing.Optional[int] = None, end: typing.Optional[int] = None, ) -> None: nonlocal header_field, header_value, content_disposition, content_type nonlocal field_name, field_value, mfw if name == "header_field": header_field += data[start:end] # type: ignore[index] elif name == "header_value": header_value += data[start:end] # type: ignore[index] elif name == "header_end": f = header_field.lower() if f == b"content-disposition": content_disposition = header_value elif f == b"content-type": content_type = header_value header_field = b"" header_value = b"" elif name == "headers_finished": disposition, options = parse_options_header(content_disposition) field_name = options[b"name"].decode("utf-8") field_value = b"" if b"filename" in options: mfw = MultipartFileWriter( options[b"filename"].decode("utf-8"), content_type.decode("utf-8"), ) else: mfw = None elif name == "part_data": if mfw: chunk = data[start:end] # type: ignore[index] if mfw.would_roll(len(chunk)): io_pending.append((mfw, chunk)) else: mfw.write(chunk) else: field_value += data[start:end] # type: ignore[index] elif name == "part_end": if mfw: rolled = mfw.would_roll(0) if rolled: io_pending.append((mfw, None)) else: mfw.seek() files.append(( field_name, MultipartFile(mfw.name, mfw.content_type, rolled, mfw.file), )) else: form.append((field_name, field_value.decode("utf-8")))
async def from_http_request(self, request: Request) -> ImageType: content_type, _ = parse_options_header(request.headers["content-type"]) mime_type = content_type.decode().lower() if mime_type == "multipart/form-data": form = await request.form() bytes_ = await next(iter(form.values())).read() elif mime_type.startswith("image/") or content_type == self._mime_type: bytes_ = await request.body() else: raise BadInput( f"{self.__class__.__name__} should get `multipart/form-data`, " f"`{self._mime_type}` or `image/*`, got {content_type} instead" ) return PIL.Image.open(io.BytesIO(bytes_))
async def from_http_request(self, request: Request) -> FileLike: content_type, _ = parse_options_header(request.headers["content-type"]) if content_type.decode("utf-8") == "multipart/form-data": form = await request.form() f = next(iter(form.values())) content = await f.read() return FileLike(bytes_=content, name=f.filename) if content_type.decode("utf-8") == "application/octet-stream": body = await request.body() return FileLike(bytes_=body) raise BentoMLException( f"{self.__class__.__name__} should have Content-Type" f" b'application/octet-stream' or b'multipart/form-data'," f" got {content_type} instead")
async def from_http_request(self, request: Request) -> MultipartIO: ctype, _ = parse_options_header(request.headers["content-type"]) if ctype != b"multipart/form-data": raise BentoMLException( f"{self.__class__.__name__} only accepts `multipart/form-data` as Content-Type header, got {ctype} instead." ) res: MultipartIO = dict() reqs = await populate_multipart_requests(request) for k, i in self._inputs.items(): req = reqs[k] v = await i.from_http_request(req) res[k] = v return res
async def form(self) -> FormData: if not hasattr(self, "_form"): assert ( parse_options_header is not None ), "The `python-multipart` library must be installed to use form parsing." content_type_header = self.headers.get("Content-Type") content_type, options = parse_options_header(content_type_header) if content_type == b"multipart/form-data": multipart_parser = MultiPartParser(self.headers, self.stream()) self._form = await multipart_parser.parse() elif content_type == b"application/x-www-form-urlencoded": form_parser = FormParser(self.headers, self.stream()) self._form = await form_parser.parse() else: self._form = FormData() return self._form
async def populate_multipart_requests( request: Request) -> t.Dict[str, Request]: content_type_header = request.headers.get("Content-Type") content_type, _ = multipart.parse_options_header(content_type_header) assert content_type in (b"multipart/form-data", b"multipart/mixed") stream = t.cast(t.AsyncGenerator[bytes, None], request.stream()) multipart_parser = MultiPartParser(request.headers, stream) try: form = await multipart_parser.parse() except multipart.MultipartParseError: raise BentoMLException("Invalid multipart requests") reqs = dict() # type: t.Dict[str, Request] for field_name, headers, data in form: scope = dict(request.scope) ori_headers = dict(scope.get("headers", list())) ori_headers = t.cast(t.Dict[bytes, bytes], ori_headers) ori_headers.update(dict(headers)) scope["headers"] = list(ori_headers.items()) req = Request(scope) req._body = data reqs[field_name] = req return reqs
async def form(self) -> FormData: if not hasattr(self, "_form"): assert ( parse_options_header is not None ), "The `python-multipart` library must be installed to use form parsing." content_type_header = self.headers.get("Content-Type") content_type: bytes content_type, _ = parse_options_header(content_type_header) if content_type == b"multipart/form-data": try: multipart_parser = MultiPartParser(self.headers, self.stream()) self._form = await multipart_parser.parse() except MultiPartException as exc: if "app" in self.scope: raise HTTPException(status_code=400, detail=exc.message) raise exc elif content_type == b"application/x-www-form-urlencoded": form_parser = FormParser(self.headers, self.stream()) self._form = await form_parser.parse() else: self._form = FormData() return self._form
async def parse(self) -> FormData: # Parse the Content-Type header to get the multipart boundary. content_type, params = parse_options_header( self.headers["Content-Type"]) charset = params.get(b"charset", "utf-8") if type(charset) == bytes: charset = charset.decode("latin-1") boundary = params[b"boundary"] # Callbacks dictionary. callbacks = { "on_part_begin": self.on_part_begin, "on_part_data": self.on_part_data, "on_part_end": self.on_part_end, "on_header_field": self.on_header_field, "on_header_value": self.on_header_value, "on_header_end": self.on_header_end, "on_headers_finished": self.on_headers_finished, "on_end": self.on_end, } # Create the parser. parser = multipart.MultipartParser(boundary, callbacks) header_field = b"" header_value = b"" content_disposition = None content_type = b"" field_name = "" data = b"" file: typing.Optional[UploadFile] = None items: typing.List[typing.Tuple[str, typing.Union[str, UploadFile]]] = [] item_headers: typing.List[typing.Tuple[bytes, bytes]] = [] # Feed the parser with data from the request. async for chunk in self.stream: parser.write(chunk) messages = list(self.messages) self.messages.clear() for message_type, message_bytes in messages: if message_type == MultiPartMessage.PART_BEGIN: content_disposition = None content_type = b"" data = b"" item_headers = [] elif message_type == MultiPartMessage.HEADER_FIELD: header_field += message_bytes elif message_type == MultiPartMessage.HEADER_VALUE: header_value += message_bytes elif message_type == MultiPartMessage.HEADER_END: field = header_field.lower() if field == b"content-disposition": content_disposition = header_value elif field == b"content-type": content_type = header_value item_headers.append((field, header_value)) header_field = b"" header_value = b"" elif message_type == MultiPartMessage.HEADERS_FINISHED: disposition, options = parse_options_header( content_disposition) field_name = _user_safe_decode(options[b"name"], charset) if b"filename" in options: filename = _user_safe_decode(options[b"filename"], charset) file = UploadFile( filename=filename, content_type=content_type.decode("latin-1"), headers=Headers(raw=item_headers), ) else: file = None elif message_type == MultiPartMessage.PART_DATA: if file is None: data += message_bytes else: await file.write(message_bytes) elif message_type == MultiPartMessage.PART_END: if file is None: items.append( (field_name, _user_safe_decode(data, charset))) else: await file.seek(0) items.append((field_name, file)) parser.finalize() return FormData(items)
async def parse(self) -> _ItemsBody: # Parse the Content-Type header to get the multipart boundary. _, params = multipart.parse_options_header( self.headers["Content-Type"]) params = t.cast(t.Dict[bytes, bytes], params) charset = params.get(b"charset", b"utf-8") charset = charset.decode("latin-1") boundary = params.get(b"boundary") # Callbacks dictionary. callbacks = { "on_part_begin": self.on_part_begin, "on_part_data": self.on_part_data, "on_part_end": self.on_part_end, "on_header_field": self.on_header_field, "on_header_value": self.on_header_value, "on_header_end": self.on_header_end, "on_headers_finished": self.on_headers_finished, "on_end": self.on_end, } # Create the parser. parser = multipart.MultipartParser(boundary, callbacks) header_field = b"" header_value = b"" field_name = "" data = b"" items: _ItemsBody = [] headers: t.List[t.Tuple[bytes, bytes]] = [] # Feed the parser with data from the request. async for chunk in self.stream: parser.write(chunk) messages = list(self.messages) self.messages.clear() for message_type, message_bytes in messages: if message_type == MultiPartMessage.PART_BEGIN: field_name = "" data = b"" headers = list() elif message_type == MultiPartMessage.HEADER_FIELD: # type: ignore header_field += message_bytes elif message_type == MultiPartMessage.HEADER_VALUE: # type: ignore header_value += message_bytes elif message_type == MultiPartMessage.HEADER_END: # type: ignore field = header_field.lower() if field == b"content-disposition": _, options = multipart.parse_options_header( header_value) options = t.cast(t.Dict[bytes, bytes], options) field_name = user_safe_decode(options[b"name"], charset) elif field == b"bentoml-payload-field": field_name = user_safe_decode(header_value, charset) else: headers.append((field, header_value)) header_field = b"" header_value = b"" elif message_type == MultiPartMessage.HEADERS_FINISHED: # type: ignore assert ( field_name ), "`Content-Disposition` is not available in headers" elif message_type == MultiPartMessage.PART_DATA: # type: ignore data += message_bytes elif message_type == MultiPartMessage.PART_END: # type: ignore items.append((field_name, headers, data)) parser.finalize() return items
async def multi_part(self) -> None: header_field = b"" header_value = b"" content_disposition = None field_name = "" _data = b"" file = None boundary = self.params.get(b"boundary") charset = self.params.get(b"charset", "utf-8") callbacks = { "on_part_begin": lambda: self.messages.append((FormMultiPartData.part_begin, b"")), "on_part_data": lambda i, j, k: self.messages.append( (FormMultiPartData.part_data, i[j:k])), "on_part_end": lambda: self.messages.append((FormMultiPartData.part_end, b"")), "on_header_field": lambda i, j, k: self.messages.append( (FormMultiPartData.header_field, i[j:k])), "on_header_value": lambda i, j, k: self.messages.append( (FormMultiPartData.header_value, i[j:k])), "on_header_end": lambda: self.messages.append((FormMultiPartData.header_end, b"")), "on_headers_finished": lambda: self.messages.append( (FormMultiPartData.headers_finished, b"")), "on_end": lambda: self.messages.append((FormMultiPartData.end, b"")), } parser = multipart.MultipartParser(boundary, callbacks) async for chunk in self.body: parser.write(chunk) messages = list(self.messages) self.messages.clear() for message_type, message_bytes in messages: if message_type == FormMultiPartData.part_begin: content_disposition = None self.content_type = b"" _data = b"" elif message_type == FormMultiPartData.header_field: header_field += message_bytes elif message_type == FormMultiPartData.header_value: header_value += message_bytes elif message_type == FormMultiPartData.header_end: field = header_field.lower() if field == b"content-disposition": content_disposition = header_value elif field == b"content-type": self.content_type = header_value header_field = b"" header_value = b"" elif message_type == FormMultiPartData.headers_finished: disposition, options = parse_options_header( content_disposition) field_name = self._decode(options[b"name"], charset) if b"filename" in options: filename = self._decode(options[b"filename"], charset) file = UploadFile( filename=filename, content_type=self.content_type.decode("latin-1"), ) else: file = None elif message_type == FormMultiPartData.part_data: if file is None: _data += message_bytes else: await file.write(message_bytes) elif message_type == FormMultiPartData.part_end: if file is None: self.request.post[field_name] = self._decode( _data, charset) else: await file.seek(0) self.request.files[field_name] = file elif message_type == FormMultiPartData.end: pass
async def parse(self) -> FormData: # Parse the Content-Type header to get the multipart boundary. content_type, params = parse_options_header(self.headers["Content-Type"]) boundary = params.get(b"boundary") # Callbacks dictionary. callbacks = { "on_part_begin": self.on_part_begin, "on_part_data": self.on_part_data, "on_part_end": self.on_part_end, "on_header_field": self.on_header_field, "on_header_value": self.on_header_value, "on_header_end": self.on_header_end, "on_headers_finished": self.on_headers_finished, "on_end": self.on_end, } # Create the parser. parser = multipart.MultipartParser(boundary, callbacks) header_field = b"" header_value = b"" raw_headers = [] # type: typing.List[typing.Tuple[bytes, bytes]] field_name = "" data = b"" file = None # type: typing.Optional[UploadFile] items = ( [] ) # type: typing.List[typing.Tuple[str, typing.Union[str, UploadFile]]] # Feed the parser with data from the request. async for chunk in self.stream: parser.write(chunk) messages = list(self.messages) self.messages.clear() for message_type, message_bytes in messages: if message_type == MultiPartMessage.PART_BEGIN: raw_headers = [] data = b"" elif message_type == MultiPartMessage.HEADER_FIELD: header_field += message_bytes elif message_type == MultiPartMessage.HEADER_VALUE: header_value += message_bytes elif message_type == MultiPartMessage.HEADER_END: raw_headers.append((header_field.lower(), header_value)) header_field = b"" header_value = b"" elif message_type == MultiPartMessage.HEADERS_FINISHED: headers = Headers(raw=raw_headers) content_disposition = headers.get("Content-Disposition") content_type = headers.get("Content-Type", "") disposition, options = parse_options_header(content_disposition) field_name = options[b"name"].decode("latin-1") if b"filename" in options: filename = options[b"filename"].decode("latin-1") file = UploadFile(filename=filename, content_type=content_type) else: file = None elif message_type == MultiPartMessage.PART_DATA: if file is None: data += message_bytes else: await file.write(message_bytes) elif message_type == MultiPartMessage.PART_END: if file is None: items.append((field_name, data.decode("latin-1"))) else: await file.seek(0) items.append((field_name, file)) elif message_type == MultiPartMessage.END: pass parser.finalize() return FormData(items)
def opentc_REQMOD(self): self.multipart_data = None self.last_form_field = None self.big_chunk = b'' self.content_analysis_results = dict() try: response = self.server.opentc["client"].command("PING\n") response = json.loads(response.decode('utf-8')) self.logger.debug("REQMOD Ping response: {}".format(response)) except Exception as err: self.logger.error(traceback.format_exc()) def on_part_begin(): self.multipart_data = dict() self.multipart_data[b'Content'] = b'' self.logger.debug("on_part_begin") def on_part_data(data, start, end): self.multipart_data[b'Content'] += data[start:end] self.logger.debug("on_part_data") def on_part_end(): self.logger.debug("on_part_end") for key in self.multipart_data: if key == b'Content': mime_type = magic.from_buffer( self.multipart_data[b'Content'], mime=True) self.logger.debug( "Content mime_type: {}".format(mime_type)) if b'Content-Type' in self.multipart_data: # content_type = [ct.strip() for ct in self.multipart_data[b'Content-Type'].split(b';')] content_type = [mime_type] content_disposition = {'name': '', 'filename': ''} for x in self.multipart_data[ b'Content-Disposition'].split(b';'): if b'=' in x: key, value = x.split(b'=') key = key.decode("utf-8").strip(" \"") value = value.decode("utf-8").strip(" \"") content_disposition[key] = value print(content_disposition) result = self.content_analyse( converter=self.server.opentc["config"] ["converter"], content_disposition=content_disposition, content_type=content_type, content=self.multipart_data[b'Content'], content_min_length=self.server.opentc["config"] ["content_min_length"], client=self.server.opentc["client"]) name = self.multipart_data[ b'Content-Disposition'].split(b';')[1].split( b'=')[1] self.content_analysis_results[name.decode( "utf-8").replace('"', '')] = result else: self.logger.debug("{}: {}".format( key, self.multipart_data[key])) return def on_header_field(data, start, end): self.last_form_field = data[start:end] self.logger.debug("on_header_field") def on_header_value(data, start, end): self.multipart_data[self.last_form_field] = data[start:end] self.logger.debug("on_header_value") def on_end(): self.logger.debug("on_end") self.set_icap_response(200) # self.set_enc_request(b' '.join(self.enc_req)) for h in self.enc_req_headers: for v in self.enc_req_headers[h]: self.set_enc_header(h, v) # Copy the request body (in case of a POST for example) if not self.has_body: self.set_enc_request(b' '.join(self.enc_req)) self.send_headers(False) return if self.preview: prevbuf = b'' while True: chunk = self.read_chunk() if chunk == b'': break prevbuf += chunk if self.ieof: self.send_headers(True) if len(prevbuf) > 0: self.write_chunk(prevbuf) self.write_chunk(b'') return self.cont() self.set_enc_request(b' '.join(self.enc_req)) self.send_headers(True) if len(prevbuf) > 0: self.write_chunk(prevbuf) while True: chunk = self.read_chunk() self.write_chunk(chunk) if chunk == b'': break else: # Parse the Content-Type header to get the multipart boundary. content_type, params = parse_options_header( self.enc_req_headers[b'content-type'][0]) boundary = params.get(b'boundary') parser = None if boundary is not None: # Callbacks dictionary. callbacks = { 'on_part_begin': on_part_begin, 'on_part_data': on_part_data, 'on_part_end': on_part_end, 'on_header_field': on_header_field, 'on_header_value': on_header_value, 'on_end': on_end } parser = multipart.MultipartParser(boundary, callbacks) while True: chunk = self.read_chunk() if chunk == b'': break self.big_chunk += chunk if boundary is not None: size = len(self.big_chunk) start = 0 while size > 0: end = min(size, 1024 * 1024) parser.write(self.big_chunk[start:end]) size -= end start = end else: result = self.content_analyse( converter=self.server.opentc["config"]["converter"], content_type=content_type, content=self.big_chunk, content_min_length=self.server.opentc["config"] ["content_min_length"], client=self.server.opentc["client"]) name = "text" self.content_analysis_results[name] = result is_allowed = True for result in self.content_analysis_results: if self.content_analysis_results[result] is None: continue for classifier in self.server.opentc["config"][ "classifier_status"]: if self.server.opentc["config"]["classifier_status"][ classifier] is False: continue for restricted_class in self.server.opentc["config"][ "restricted_classes"]: self.logger.debug( "{}: result:{}, classifier:{}".format( restricted_class, result, classifier)) if restricted_class in self.content_analysis_results[ result][classifier]: is_allowed = False break else: is_allowed = True if is_allowed is True: break if is_allowed is False: break if is_allowed: self.set_enc_request(b' '.join(self.enc_req)) self.send_headers(True) self.write_chunk(self.big_chunk) else: content = json.dumps(self.content_analysis_results) content = "result={}".format(content).encode("utf-8") enc_req = self.enc_req[:] enc_req[0] = self.server.opentc["config"][ "replacement_http_method"].encode("utf-8") enc_req[1] = self.server.opentc["config"][ "replacement_url"].encode("utf-8") self.set_enc_request(b' '.join(enc_req)) self.enc_headers[b"content-type"] = [ b"application/x-www-form-urlencoded" ] self.enc_headers[b"content-length"] = [ str(len(content)).encode("utf-8") ] self.send_headers(True) self.write_chunk(content)
async def parse_multipart( # noqa: C901, CCR001 content_type_header: bytes, chunks: typing.AsyncIterator[bytes] ) -> typing.Tuple[FormParams, MultipartFiles]: assert (parse_options_header is not None), "The 'python-multipart' package must be installed." content_type, params = parse_options_header(content_type_header) header_field = b"" header_value = b"" content_disposition = b"" field_name = "" field_value = b"" mfw: typing.Optional[MultipartFileWriter] = None io_pending: Operations = [] form: typing.List[typing.Tuple[str, str]] = [] files: typing.List[typing.Tuple[str, MultipartFile]] = [] def callback( # noqa: CCR001 name: str, data: typing.Optional[bytes] = None, start: typing.Optional[int] = None, end: typing.Optional[int] = None, ) -> None: nonlocal header_field, header_value, content_disposition, content_type nonlocal field_name, field_value, mfw if name == "header_field": header_field += data[start:end] # type: ignore[index] elif name == "header_value": header_value += data[start:end] # type: ignore[index] elif name == "header_end": f = header_field.lower() if f == b"content-disposition": content_disposition = header_value elif f == b"content-type": content_type = header_value header_field = b"" header_value = b"" elif name == "headers_finished": disposition, options = parse_options_header(content_disposition) field_name = options[b"name"].decode("utf-8") field_value = b"" if b"filename" in options: mfw = MultipartFileWriter( options[b"filename"].decode("utf-8"), content_type.decode("utf-8"), ) else: mfw = None elif name == "part_data": if mfw: chunk = data[start:end] # type: ignore[index] if mfw.would_roll(len(chunk)): io_pending.append((mfw, chunk)) else: mfw.write(chunk) else: field_value += data[start:end] # type: ignore[index] elif name == "part_end": if mfw: rolled = mfw.would_roll(0) if rolled: io_pending.append((mfw, None)) else: mfw.seek() files.append(( field_name, MultipartFile(mfw.name, mfw.content_type, rolled, mfw.file), )) else: form.append((field_name, field_value.decode("utf-8"))) parser = MultipartParser(params.get(b"boundary")) parser.callback = callback loop = get_running_loop() async for chunk in chunks: parser.write(chunk) if io_pending: await loop.run_in_executor(None, flush_pending_io, io_pending) io_pending.clear() return FormParams(form), MultipartFiles(files)