示例#1
0
    async def invocations(self, request: Request) -> Response:
        """
        This custom handler is meant to mimic the behaviour of the existing
        scoring server in MLflow.
        For details about its implementation, please consult the original
        implementation in the MLflow repository:

            https://github.com/mlflow/mlflow/blob/master/mlflow/pyfunc/scoring_server/__init__.py
        """
        content_type = request.headers.get("content-type", None)
        raw_data = await request.body()
        as_str = raw_data.decode("utf-8")

        if content_type == CONTENT_TYPE_CSV:
            csv_input = StringIO(as_str)
            data = parse_csv_input(csv_input=csv_input)
        elif content_type == CONTENT_TYPE_JSON:
            data = infer_and_parse_json_input(as_str, self._input_schema)
        elif content_type == CONTENT_TYPE_JSON_SPLIT_ORIENTED:
            data = parse_json_input(
                json_input=StringIO(as_str),
                orient="split",
                schema=self._input_schema,
            )
        elif content_type == CONTENT_TYPE_JSON_RECORDS_ORIENTED:
            data = parse_json_input(
                json_input=StringIO(as_str),
                orient="records",
                schema=self._input_schema,
            )
        elif content_type == CONTENT_TYPE_JSON_SPLIT_NUMPY:
            data = parse_split_oriented_json_input_to_numpy(as_str)
        else:
            content_type_error_message = (
                "This predictor only supports the following content types, "
                f"{CONTENT_TYPES}. Got '{content_type}'.")
            raise InferenceError(content_type_error_message)

        try:
            raw_predictions = self._model.predict(data)
        except MlflowException as e:
            raise InferenceError(e.message)
        except Exception:
            error_message = (
                "Encountered an unexpected error while evaluating the model. Verify"
                " that the serialized input Dataframe is compatible with the model for"
                " inference.")
            raise InferenceError(error_message)

        result = StringIO()
        predictions_to_json(raw_predictions, result)
        return Response(content=result.getvalue(),
                        media_type="application/json")
示例#2
0
def test_parse_with_schema(pandas_df_with_all_types):
    schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns])
    df = _shuffle_pdf(pandas_df_with_all_types)
    json_str = json.dumps(df.to_dict(orient="split"), cls=NumpyEncoder)
    df = pyfunc_scoring_server.parse_json_input(json_str,
                                                orient="split",
                                                schema=schema)
    json_str = json.dumps(df.to_dict(orient="records"), cls=NumpyEncoder)
    df = pyfunc_scoring_server.parse_json_input(json_str,
                                                orient="records",
                                                schema=schema)
    assert schema == infer_signature(df[schema.input_names()]).inputs

    # The current behavior with pandas json parse with type hints is weird. In some cases, the
    # types are forced ignoting overflow and loss of precision:

    bad_df = """{
      "columns":["bad_integer", "bad_float", "bad_string", "bad_boolean"],
      "data":[
        [9007199254740991.0, 1.1,                1, 1.5],
        [9007199254740992.0, 9007199254740992.0, 2, 0],
        [9007199254740994.0, 3.3,                3, "some arbitrary string"]
      ]
    }"""
    schema = Schema([
        ColSpec("integer", "bad_integer"),
        ColSpec("float", "bad_float"),
        ColSpec("float", "good_float"),
        ColSpec("string", "bad_string"),
        ColSpec("boolean", "bad_boolean"),
    ])
    df = pyfunc_scoring_server.parse_json_input(bad_df,
                                                orient="split",
                                                schema=schema)
    # Unfortunately, the current behavior of pandas parse is to force numbers to int32 even if
    # they don't fit:
    assert df["bad_integer"].dtype == np.int32
    assert all(df["bad_integer"] == [-2147483648, -2147483648, -2147483648])

    # The same goes for floats:
    assert df["bad_float"].dtype == np.float32
    assert all(df["bad_float"] == np.array([1.1, 9007199254740992, 3.3],
                                           dtype=np.float32))
    # However bad string is recognized as int64:
    assert all(df["bad_string"] == np.array([1, 2, 3], dtype=np.object))

    # Boolean is forced - zero and empty string is false, everything else is true:
    assert df["bad_boolean"].dtype == np.bool
    assert all(df["bad_boolean"] == [True, False, True])
示例#3
0
    def predict(self, deployment_name, df):
        """
        Predict on the specified deployment using the provided dataframe.

        Compute predictions on the pandas DataFrame ``df`` using the specified deployment.
        Note that the input/output types of this method matches that of `mlflow pyfunc predict`
        (we accept a pandas DataFrame as input and return either a pandas DataFrame,
        pandas Series, or numpy array as output).

        :param deployment_name: Name of deployment to predict against
        :param df: Pandas DataFrame to use for inference
        :return: A pandas DataFrame, pandas Series, or numpy array
        """
        try:
            service = Webservice(self.workspace, deployment_name)
        except Exception as e:
            raise MlflowException(
                'Failure retrieving deployment to predict against') from e

        # Take in DF, parse to json using split orient
        input_data = _get_jsonable_obj(df, pandas_orient='split')

        if not service.scoring_uri:
            raise MlflowException(
                'Error attempting to call webservice, scoring_uri unavailable. '
                'This could be due to a failed deployment, or the service is not ready yet.\n'
                'Current State: {}\n'
                'Errors: {}'.format(service.state, service.error))

        # Pass split orient json to webservice
        # Take records orient json from webservice
        resp = ClientBase._execute_func(service._webservice_session.post,
                                        service.scoring_uri,
                                        data=json.dumps(
                                            {'input_data': input_data}))

        if resp.status_code == 401:
            if service.auth_enabled:
                service_keys = service.get_keys()
                service._session.headers.update(
                    {'Authorization': 'Bearer ' + service_keys[0]})
            elif service.token_auth_enabled:
                service_token, refresh_token_time = service.get_access_token()
                service._refresh_token_time = refresh_token_time
                service._session.headers.update(
                    {'Authorization': 'Bearer ' + service_token})
            resp = ClientBase._execute_func(service._webservice_session.post,
                                            service.scoring_uri,
                                            data=input_data)

        if resp.status_code == 200:
            # Parse records orient json to df
            return parse_json_input(json.dumps(resp.json()), orient='records')
        else:
            raise MlflowException('Failure during prediction:\n'
                                  'Response Code: {}\n'
                                  'Headers: {}\n'
                                  'Content: {}'.format(resp.status_code,
                                                       resp.headers,
                                                       resp.content))
def test_split_oriented_json_to_df():
    # test that datatype for "zip" column is not converted to "int64"
    jstr = '{"columns":["zip","cost","count"],"index":[0,1,2],' \
           '"data":[["95120",10.45,-8],["95128",23.0,-1],["95128",12.1,1000]]}'
    df = pyfunc_scoring_server.parse_json_input(jstr, orient="split")

    assert set(df.columns) == {'zip', 'cost', 'count'}
    assert set(str(dt) for dt in df.dtypes) == {'object', 'float64', 'int64'}
def test_parse_json_input_split_oriented():
    size = 200
    data = {"col_m": [random_int(0, 1000) for _ in range(size)],
            "col_z": [random_str(4) for _ in range(size)],
            "col_a": [random_int() for _ in range(size)]}
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="split"), orient="split")
    assert all(p1 == p2)
def test_parse_json_input_records_oriented():
    size = 20
    data = {"col_m": [random_int(0, 1000) for _ in range(size)],
            "col_z": [random_str(4) for _ in range(size)],
            "col_a": [random_int() for _ in range(size)]}
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="records"), orient="records")
    # "records" orient may shuffle column ordering. Hence comparing each column Series
    for col in data.keys():
        assert all(p1[col] == p2[col])
示例#7
0
def test_records_oriented_json_to_df():
    # test that datatype for "zip" column is not converted to "int64"
    jstr = ("["
            '{"zip":"95120","cost":10.45,"score":8},'
            '{"zip":"95128","cost":23.0,"score":0},'
            '{"zip":"95128","cost":12.1,"score":10}'
            "]")
    df = pyfunc_scoring_server.parse_json_input(jstr, orient="records")

    assert set(df.columns) == {"zip", "cost", "score"}
    assert set(str(dt) for dt in df.dtypes) == {"object", "float64", "int64"}
def test_records_oriented_json_to_df():
    # test that datatype for "zip" column is not converted to "int64"
    jstr = '[' \
           '{"zip":"95120","cost":10.45,"score":8},' \
           '{"zip":"95128","cost":23.0,"score":0},' \
           '{"zip":"95128","cost":12.1,"score":10}' \
           ']'
    df = pyfunc_scoring_server.parse_json_input(jstr, orient="records")

    assert set(df.columns) == {'zip', 'cost', 'score'}
    assert set(str(dt) for dt in df.dtypes) == {'object', 'float64', 'int64'}