async def invocations(self, request: Request) -> Response: """ This custom handler is meant to mimic the behaviour of the existing scoring server in MLflow. For details about its implementation, please consult the original implementation in the MLflow repository: https://github.com/mlflow/mlflow/blob/master/mlflow/pyfunc/scoring_server/__init__.py """ content_type = request.headers.get("content-type", None) raw_data = await request.body() as_str = raw_data.decode("utf-8") if content_type == CONTENT_TYPE_CSV: csv_input = StringIO(as_str) data = parse_csv_input(csv_input=csv_input) elif content_type == CONTENT_TYPE_JSON: data = infer_and_parse_json_input(as_str, self._input_schema) elif content_type == CONTENT_TYPE_JSON_SPLIT_ORIENTED: data = parse_json_input( json_input=StringIO(as_str), orient="split", schema=self._input_schema, ) elif content_type == CONTENT_TYPE_JSON_RECORDS_ORIENTED: data = parse_json_input( json_input=StringIO(as_str), orient="records", schema=self._input_schema, ) elif content_type == CONTENT_TYPE_JSON_SPLIT_NUMPY: data = parse_split_oriented_json_input_to_numpy(as_str) else: content_type_error_message = ( "This predictor only supports the following content types, " f"{CONTENT_TYPES}. Got '{content_type}'.") raise InferenceError(content_type_error_message) try: raw_predictions = self._model.predict(data) except MlflowException as e: raise InferenceError(e.message) except Exception: error_message = ( "Encountered an unexpected error while evaluating the model. Verify" " that the serialized input Dataframe is compatible with the model for" " inference.") raise InferenceError(error_message) result = StringIO() predictions_to_json(raw_predictions, result) return Response(content=result.getvalue(), media_type="application/json")
def test_parse_with_schema(pandas_df_with_all_types): schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns]) df = _shuffle_pdf(pandas_df_with_all_types) json_str = json.dumps(df.to_dict(orient="split"), cls=NumpyEncoder) df = pyfunc_scoring_server.parse_json_input(json_str, orient="split", schema=schema) json_str = json.dumps(df.to_dict(orient="records"), cls=NumpyEncoder) df = pyfunc_scoring_server.parse_json_input(json_str, orient="records", schema=schema) assert schema == infer_signature(df[schema.input_names()]).inputs # The current behavior with pandas json parse with type hints is weird. In some cases, the # types are forced ignoting overflow and loss of precision: bad_df = """{ "columns":["bad_integer", "bad_float", "bad_string", "bad_boolean"], "data":[ [9007199254740991.0, 1.1, 1, 1.5], [9007199254740992.0, 9007199254740992.0, 2, 0], [9007199254740994.0, 3.3, 3, "some arbitrary string"] ] }""" schema = Schema([ ColSpec("integer", "bad_integer"), ColSpec("float", "bad_float"), ColSpec("float", "good_float"), ColSpec("string", "bad_string"), ColSpec("boolean", "bad_boolean"), ]) df = pyfunc_scoring_server.parse_json_input(bad_df, orient="split", schema=schema) # Unfortunately, the current behavior of pandas parse is to force numbers to int32 even if # they don't fit: assert df["bad_integer"].dtype == np.int32 assert all(df["bad_integer"] == [-2147483648, -2147483648, -2147483648]) # The same goes for floats: assert df["bad_float"].dtype == np.float32 assert all(df["bad_float"] == np.array([1.1, 9007199254740992, 3.3], dtype=np.float32)) # However bad string is recognized as int64: assert all(df["bad_string"] == np.array([1, 2, 3], dtype=np.object)) # Boolean is forced - zero and empty string is false, everything else is true: assert df["bad_boolean"].dtype == np.bool assert all(df["bad_boolean"] == [True, False, True])
def predict(self, deployment_name, df): """ Predict on the specified deployment using the provided dataframe. Compute predictions on the pandas DataFrame ``df`` using the specified deployment. Note that the input/output types of this method matches that of `mlflow pyfunc predict` (we accept a pandas DataFrame as input and return either a pandas DataFrame, pandas Series, or numpy array as output). :param deployment_name: Name of deployment to predict against :param df: Pandas DataFrame to use for inference :return: A pandas DataFrame, pandas Series, or numpy array """ try: service = Webservice(self.workspace, deployment_name) except Exception as e: raise MlflowException( 'Failure retrieving deployment to predict against') from e # Take in DF, parse to json using split orient input_data = _get_jsonable_obj(df, pandas_orient='split') if not service.scoring_uri: raise MlflowException( 'Error attempting to call webservice, scoring_uri unavailable. ' 'This could be due to a failed deployment, or the service is not ready yet.\n' 'Current State: {}\n' 'Errors: {}'.format(service.state, service.error)) # Pass split orient json to webservice # Take records orient json from webservice resp = ClientBase._execute_func(service._webservice_session.post, service.scoring_uri, data=json.dumps( {'input_data': input_data})) if resp.status_code == 401: if service.auth_enabled: service_keys = service.get_keys() service._session.headers.update( {'Authorization': 'Bearer ' + service_keys[0]}) elif service.token_auth_enabled: service_token, refresh_token_time = service.get_access_token() service._refresh_token_time = refresh_token_time service._session.headers.update( {'Authorization': 'Bearer ' + service_token}) resp = ClientBase._execute_func(service._webservice_session.post, service.scoring_uri, data=input_data) if resp.status_code == 200: # Parse records orient json to df return parse_json_input(json.dumps(resp.json()), orient='records') else: raise MlflowException('Failure during prediction:\n' 'Response Code: {}\n' 'Headers: {}\n' 'Content: {}'.format(resp.status_code, resp.headers, resp.content))
def test_split_oriented_json_to_df(): # test that datatype for "zip" column is not converted to "int64" jstr = '{"columns":["zip","cost","count"],"index":[0,1,2],' \ '"data":[["95120",10.45,-8],["95128",23.0,-1],["95128",12.1,1000]]}' df = pyfunc_scoring_server.parse_json_input(jstr, orient="split") assert set(df.columns) == {'zip', 'cost', 'count'} assert set(str(dt) for dt in df.dtypes) == {'object', 'float64', 'int64'}
def test_parse_json_input_split_oriented(): size = 200 data = {"col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)]} p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="split"), orient="split") assert all(p1 == p2)
def test_parse_json_input_records_oriented(): size = 20 data = {"col_m": [random_int(0, 1000) for _ in range(size)], "col_z": [random_str(4) for _ in range(size)], "col_a": [random_int() for _ in range(size)]} p1 = pd.DataFrame.from_dict(data) p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="records"), orient="records") # "records" orient may shuffle column ordering. Hence comparing each column Series for col in data.keys(): assert all(p1[col] == p2[col])
def test_records_oriented_json_to_df(): # test that datatype for "zip" column is not converted to "int64" jstr = ("[" '{"zip":"95120","cost":10.45,"score":8},' '{"zip":"95128","cost":23.0,"score":0},' '{"zip":"95128","cost":12.1,"score":10}' "]") df = pyfunc_scoring_server.parse_json_input(jstr, orient="records") assert set(df.columns) == {"zip", "cost", "score"} assert set(str(dt) for dt in df.dtypes) == {"object", "float64", "int64"}
def test_records_oriented_json_to_df(): # test that datatype for "zip" column is not converted to "int64" jstr = '[' \ '{"zip":"95120","cost":10.45,"score":8},' \ '{"zip":"95128","cost":23.0,"score":0},' \ '{"zip":"95128","cost":12.1,"score":10}' \ ']' df = pyfunc_scoring_server.parse_json_input(jstr, orient="records") assert set(df.columns) == {'zip', 'cost', 'score'} assert set(str(dt) for dt in df.dtypes) == {'object', 'float64', 'int64'}