def from_inference_job( # pylint: disable=arguments-differ self, input_=None, input_file=None, **extra_args) -> Iterator[InferenceTask[str]]: ''' Generate InferenceTask from calling bentom_svc.run(input_=None, input_file=None) Parameters ---------- input_ : str The input value input_file : str The URI/path of the input file extra_args : dict Additional parameters ''' if input_file is not None: for d in input_file: uri = pathlib.Path(d).absolute().as_uri() yield InferenceTask(inference_job_args=extra_args, data=FileLike(uri=uri)) else: for d in input_: yield InferenceTask( inference_job_args=extra_args, data=FileLike(bytes_=d.encode()), )
def from_http_request(self, req: HTTPRequest) -> InferenceTask[str]: if req.headers.content_type == 'multipart/form-data': _, _, files = HTTPRequest.parse_form_data(req) if len(files) != 1: return InferenceTask().discard( http_status=400, err_msg= f"BentoML#{self.__class__.__name__} accepts one text file " "at a time", ) input_file = next(iter(files.values())) bytes_ = input_file.read() charset = chardet.detect(bytes_)['encoding'] or "utf-8" else: bytes_ = req.body charset = req.headers.charset or "utf-8" try: return InferenceTask( http_headers=req.headers, data=bytes_.decode(charset), ) except UnicodeDecodeError: return InferenceTask().discard( http_status=400, err_msg= f"{self.__class__.__name__}: UnicodeDecodeError for {req.body}", ) except LookupError: return InferenceTask().discard( http_status=400, err_msg= f"{self.__class__.__name__}: Unsupported charset {req.charset}", )
def from_cli(self, cli_args: Tuple[str]) -> Iterator[InferenceTask[str]]: for input_ in parse_cli_input(cli_args): try: yield InferenceTask( cli_args=cli_args, data=input_.read().decode(), # charset for cli input ) except UnicodeDecodeError: yield InferenceTask().discard( http_status=400, err_msg=f"{self.__class__.__name__}: UnicodeDecodeError", )
def from_inference_job( # pylint: disable=arguments-differ self, input_=None, input_file=None, **extra_args) -> Iterator[InferenceTask[FileLike]]: if input_file is not None: for ds in zip(*input_file): uris = (pathlib.Path(d).absolute().as_uri() for d in ds) fs = tuple(FileLike(uri=uri) for uri in uris) yield InferenceTask(data=fs, inference_job_args=extra_args) else: for ds in zip(*input_): fs = tuple(FileLike(bytes_=d.encode()) for d in ds) yield InferenceTask(data=fs, inference_job_args=extra_args)
def from_aws_lambda_event(self, event: AwsLambdaEvent) -> InferenceTask[FileLike]: headers = HTTPHeaders.from_dict(event.get('headers', {})) if headers.content_type == "text/csv": f = FileLike(bytes_=event["body"].encode(), name="input.csv") else: # Optimistically assuming Content-Type to be "application/json" f = FileLike(bytes_=event["body"].encode(), name="input.json") return InferenceTask(aws_lambda_event=event, data=f,)
def test_multi_image_input_extract_args_wrong_extension( input_adapter, gen_img_io): task = InferenceTask(data=(gen_img_io("test.custom"), gen_img_io("test.jpg"))) args = input_adapter.extract_user_func_args([task]) for _ in zip(*args): pass assert task.is_discarded
def from_inference_job( # pylint: disable=arguments-differ self, input_=None, input_file=None, **extra_args, ) -> Iterator[InferenceTask[str]]: ''' Generate InferenceTask from calling bentom_svc.run(input_=None, input_file=None) Parameters ---------- input_ : str The input value input_file : str The URI/path of the input file extra_args : dict Additional parameters ''' if input_file is not None: for d in input_file: uri = pathlib.Path(d).absolute().as_uri() bytes_ = FileLike(uri=uri).read() try: charset = chardet.detect(bytes_)['encoding'] or "utf-8" yield InferenceTask( inference_job_args=extra_args, data=bytes_.decode(charset), ) except UnicodeDecodeError: yield InferenceTask().discard( http_status=400, err_msg=f"{self.__class__.__name__}: " f"Try decoding with {charset} but failed with DecodeError.", ) except LookupError: yield InferenceTask().discard( http_status=400, err_msg=f"{self.__class__.__name__}: " f"Unsupported charset {charset}", ) else: for d in input_: yield InferenceTask(inference_job_args=extra_args, data=d)
def from_cli(self, cli_args: Tuple[str]) -> Iterator[InferenceTask[BinaryIO]]: for input_ in parse_cli_input(cli_args): bio = io.BytesIO(input_.read()) bio.name = input_.name yield InferenceTask( context=InferenceContext(cli_args=cli_args), data=bio, )
def from_http_request(self, req: HTTPRequest) -> InferenceTask[str]: try: return InferenceTask( http_headers=req.headers, data=req.body.decode(req.headers.charset or "utf-8"), ) except UnicodeDecodeError: return InferenceTask().discard( http_status=400, err_msg= f"{self.__class__.__name__}: UnicodeDecodeError for {req.body}", ) except LookupError: return InferenceTask().discard( http_status=400, err_msg= f"{self.__class__.__name__}: Unsupported charset {req.charset}", )
def test_anno_image_input_extract_args_custom_extension( input_adapter, img_file, json_file): img_io = io.BytesIO(read_bin(img_file)) img_io.name = "test.custom" json_io = io.BytesIO(read_bin(json_file)) task = InferenceTask(data=(img_io, json_io)) args = input_adapter.extract_user_func_args([task]) for _ in zip(*args): pass assert task.is_discarded input_adapter = AnnotatedImageInput(accept_image_formats=["custom"]) task = InferenceTask(data=(img_io, json_io)) args = input_adapter.extract_user_func_args([task]) for img, json_obj in zip(*args): assert img.shape == (10, 10, 3) assert json_obj['name'] == "kaith"
def test_multi_image_input_extract_args_missing_image(input_adapter, gen_img_io): task = InferenceTask(data=(None, gen_img_io())) args = input_adapter.extract_user_func_args([task]) assert not args[0] for _ in zip(*args): pass assert task.is_discarded
def test_multi_image_input_extract_args_custom_extension( gen_img_io, verify_args): input_adapter = MultiImageInput(accept_image_formats=[".custom", ".jpg"]) task = InferenceTask(data=(gen_img_io("test.custom"), gen_img_io("test.jpg"))) args = input_adapter.extract_user_func_args([task]) assert not task.context.err_msg assert not task.is_discarded verify_args(args)
def from_aws_lambda_event(self, event: AwsLambdaEvent) -> InferenceTask[BinaryIO]: parsed_headers = ParsedHeaders.parse(tuple(event.get('headers', {}).items())) if parsed_headers.content_type == "text/csv": bio = io.BytesIO(event["body"].encode()) bio.name = "input.csv" else: # Optimistically assuming Content-Type to be "application/json" bio = io.BytesIO(event["body"].encode()) bio.name = "input.json" return InferenceTask(aws_lambda_event=event, data=bio,)
def test_anno_image_input_extract_args_missing_image(input_adapter, json_file): json_io = io.BytesIO(read_bin(json_file)) task = InferenceTask(data=(None, json_io)) args = input_adapter.extract_user_func_args([task]) assert not args[0] for _ in zip(*args): pass assert task.is_discarded
def from_cli(self, cli_args: Tuple[str]) -> Iterator[InferenceTask[str]]: for input_ in parse_cli_input(cli_args): try: bytes_ = input_.read() charset = chardet.detect(bytes_)['encoding'] or "utf-8" yield InferenceTask( cli_args=cli_args, data=bytes_.decode(charset), ) except UnicodeDecodeError: yield InferenceTask().discard( http_status=400, err_msg=f"{self.__class__.__name__}: " f"Try decoding with {charset} but failed with DecodeError.", ) except LookupError: return InferenceTask().discard( http_status=400, err_msg= f"{self.__class__.__name__}: Unsupported charset {charset}", )
def _method(self, req: HTTPRequest) -> InferenceTask: if req.headers.content_encoding in {"gzip", "x-gzip"}: # https://tools.ietf.org/html/rfc7230#section-4.2.3 try: req.body = gzip.decompress(req.body) except OSError: return InferenceTask().discard( http_status=400, err_msg="Gzip decompression error") req.headers.pop("content-encoding") return method(self, req) return method(self, req)
def from_http_request(self, req: HTTPRequest) -> InferenceTask[bytes]: if req.parsed_headers.content_encoding in {"gzip", "x-gzip"}: # https://tools.ietf.org/html/rfc7230#section-4.2.3 try: return InferenceTask( http_headers=req.parsed_headers, data=gzip.decompress(req.body), ) except OSError: task = InferenceTask(data=None) task.discard(http_status=400, err_msg="Gzip decompression error") return task elif req.parsed_headers.content_encoding in ["", "identity"]: return InferenceTask( http_headers=req.parsed_headers, data=req.body, ) else: task = InferenceTask(data=None) task.discard(http_status=415, err_msg="Unsupported Media Type") return task
def test_anno_image_input_extract_args(input_adapter, img_file, json_file): img_io = io.BytesIO(read_bin(img_file)) img_io.name = "test.jpg" json_io = io.BytesIO(read_bin(json_file)) task = InferenceTask(data=(img_io, json_io)) args = input_adapter.extract_user_func_args([task]) assert args[0] assert args[1] for img, json_obj in zip(*args): assert img.shape == (10, 10, 3) assert json_obj['name'] == "kaith"
def test_file_input_extract(input_adapter, bin_file): bin_bytes = read_bin(bin_file) bin_ios = [ tuple(io.BytesIO(bin_bytes) for _ in range(2)) for _ in range(5) ] tasks = [InferenceTask(data=bin_io_pair) for bin_io_pair in bin_ios] args = input_adapter.extract_user_func_args(tasks) assert args[0] assert args[1] for file1, file2 in zip(*args): assert b'\x810\x899' == file1.read() assert b'\x810\x899' == file2.read()
def from_aws_lambda_event(self, event): if event["headers"].get("Content-Type", "").startswith("images/"): img_bytes = base64.b64decode(event["body"]) _, ext = event["headers"]["Content-Type"].split('/') f = FileLike(bytes_=img_bytes, name=f"default.{ext}") task = InferenceTask(data=(f, )) else: task = InferenceTask(data=None) task.discard( http_status=400, err_msg="BentoML currently doesn't support Content-Type: " "{content_type} for AWS Lambda".format( content_type=event["headers"]["Content-Type"]), ) return task
def from_aws_lambda_event( self, event: AwsLambdaEvent) -> InferenceTask[BinaryIO]: return InferenceTask( context=InferenceContext(aws_lambda_event=event), data=io.BytesIO(base64.decodebytes(event.get('body', ""))), )
def test_multi_image_input_extract_args(input_adapter, gen_img_io, verify_args): task = InferenceTask(data=(gen_img_io(), gen_img_io())) args = input_adapter.extract_user_func_args([task]) verify_args(args)
def from_http_request(self, req: HTTPRequest) -> MultiImgTask: if len(self.input_names) == 1: # broad parsing while single input if req.parsed_headers.content_type == 'multipart/form-data': _, _, files = HTTPRequest.parse_form_data(req) if not any(files): task = InferenceTask(data=None) task.discard( http_status=400, err_msg= f"BentoML#{self.__class__.__name__} requires inputs" f"fields {self.input_names}", ) else: f = next(iter(files.values())) task = InferenceTask( http_headers=req.parsed_headers, data=(f, ), ) else: # for images/* task = InferenceTask( http_headers=req.parsed_headers, data=(io.BytesIO(req.body), ), ) elif req.parsed_headers.content_type == 'multipart/form-data': _, _, files = HTTPRequest.parse_form_data(req) files = tuple(files.get(k) for k in self.input_names) if not any(files): task = InferenceTask(data=None) task.discard( http_status=400, err_msg=f"BentoML#{self.__class__.__name__} requires inputs " f"fields {self.input_names}", ) elif not all(files) and not self.allow_none: task = InferenceTask(data=None) task.discard( http_status=400, err_msg=f"BentoML#{self.__class__.__name__} requires inputs " f"fields {self.input_names}", ) else: task = InferenceTask( http_headers=req.parsed_headers, data=files, ) else: task = InferenceTask(data=None) task.discard( http_status=400, err_msg= f"BentoML#{self.__class__.__name__} with multiple inputs " "accepts requests with Content-Type: multipart/form-data only", ) return task
def from_aws_lambda_event(self, event: AwsLambdaEvent) -> InferenceTask[str]: return InferenceTask( aws_lambda_event=event, data=event.get('body', ""), )
def from_http_request(self, req: HTTPRequest) -> MultiFileTask: if req.headers.content_type != 'multipart/form-data': task = InferenceTask(data=None) task.discard( http_status=400, err_msg= f"BentoML#{self.__class__.__name__} only accepts requests " "with Content-Type: multipart/form-data", ) else: _, _, files = HTTPRequest.parse_form_data(req) files = tuple(files.get(k) for k in self.input_names) if not any(files): task = InferenceTask(data=None) task.discard( http_status=400, err_msg=f"BentoML#{self.__class__.__name__} requires inputs " f"fields {self.input_names}", ) elif not all(files) and not self.allow_none: task = InferenceTask(data=None) task.discard( http_status=400, err_msg=f"BentoML#{self.__class__.__name__} requires inputs " f"fields {self.input_names}", ) else: task = InferenceTask( http_headers=req.headers, data=files, ) return task
def predict_strict_json(self, input_data, task: InferenceTask = None): if task.http_headers.content_type != "application/json": task.discard(http_status=400, err_msg="application/json only") return result = self.artifacts.model.predict_json([input_data])[0] return result
def from_http_request(self, req: HTTPRequest) -> InferenceTask[BinaryIO]: if req.parsed_headers.content_type == 'multipart/form-data': _, _, files = HTTPRequest.parse_form_data(req) if len(files) != 1: task = InferenceTask(data=None) task.discard( http_status=400, err_msg= f"BentoML#{self.__class__.__name__} requires one and at" " least one file at a time, if you just upgraded from" " bentoml 0.7, you may need to use MultiFileAdapter instead", ) else: input_file = next(iter(files.values())) task = InferenceTask( context=InferenceContext(http_headers=req.parsed_headers), data=input_file, ) elif req.body: task = InferenceTask( context=InferenceContext(http_headers=req.parsed_headers), data=io.BytesIO(req.body), ) else: task = InferenceTask(data=None) task.discard( http_status=400, err_msg= f'BentoML#{self.__class__.__name__} unexpected HTTP request' ' format', ) return task
def from_aws_lambda_event(self, event: AwsLambdaEvent) -> InferenceTask[bytes]: return InferenceTask( aws_lambda_event=event, data=event.get('body', "").encode(JSON_CHARSET), )
def from_cli(self, cli_args: Sequence[str]) -> Iterator[MultiFileTask]: for inputs in parse_cli_inputs(cli_args, self.input_names): yield InferenceTask(cli_args=cli_args, data=inputs)
def from_cli(self, cli_args: Tuple[str]) -> Iterator[InferenceTask[bytes]]: for json_input in parse_cli_input(cli_args): yield InferenceTask(cli_args=cli_args, data=json_input.read())