def test_schema_enforcement_named_tensor_schema_1d(): m = Model() input_schema = Schema([ TensorSpec(np.dtype(np.uint64), (-1, ), "a"), TensorSpec(np.dtype(np.float32), (-1, ), "b") ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) pdf = pd.DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) pdf["a"] = pdf["a"].astype(np.uint64) pdf["b"] = pdf["a"].astype(np.float32) d_inp = { "a": np.array(pdf["a"], dtype=np.uint64), "b": np.array(pdf["b"], dtype=np.float32), } # test dataframe input works for 1d tensor specs and input is converted to dict res = pyfunc_model.predict(pdf) assert _compare_exact_tensor_dict_input(res, d_inp) expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that dictionary works too res = pyfunc_model.predict(d_inp) assert res == d_inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types
def test_parse_tf_serving_dictionary(): def assert_result(result, expected_result): assert result.keys() == expected_result.keys() for key in result: assert (result[key] == expected_result[key]).all() # instances are correctly aggregated to dict of input name -> tensor tfserving_input = { "instances": [ {"a": "s1", "b": 1.1, "c": [1, 2, 3]}, {"a": "s2", "b": 2.2, "c": [4, 5, 6]}, {"a": "s3", "b": 3.3, "c": [7, 8, 9]}, ] } # Without Schema result = parse_tf_serving_input(tfserving_input) expected_result_no_schema = { "a": np.array(["s1", "s2", "s3"]), "b": np.array([1.1, 2.2, 3.3], dtype="float64"), "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int64"), } assert_result(result, expected_result_no_schema) # With schema schema = Schema( [ TensorSpec(np.dtype("object"), [-1], "a"), TensorSpec(np.dtype("float32"), [-1], "b"), TensorSpec(np.dtype("int32"), [-1], "c"), ] ) result = parse_tf_serving_input(tfserving_input, schema) expected_result_schema = { "a": np.array(["s1", "s2", "s3"]), "b": np.array([1.1, 2.2, 3.3], dtype="float32"), "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int32"), } assert_result(result, expected_result_schema) # input provided as a dict tfserving_input = { "inputs": { "a": ["s1", "s2", "s3"], "b": [1.1, 2.2, 3.3], "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], } } # Without Schema result = parse_tf_serving_input(tfserving_input) assert_result(result, expected_result_no_schema) # With Schema result = parse_tf_serving_input(tfserving_input, schema) assert_result(result, expected_result_schema)
def test_schema_enforcement_single_named_tensor_schema(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.uint64), (-1, 2), "a")]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) inp = { "a": np.array([[0, 0], [1, 1]], dtype=np.uint64), } # sanity test that dictionary with correct input works res = pyfunc_model.predict(inp) assert res == inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test single np.ndarray input works and is converted to dictionary res = pyfunc_model.predict(inp["a"]) assert res == inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test list does not work with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[0, 0], [1, 1]]) assert "Model is missing inputs ['a']" in str(ex)
def test_tensor_schema_enforcement_no_col_names(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) test_data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32) # Can call with numpy array of correct shape assert np.array_equal(pyfunc_model.predict(test_data), test_data) # Or can call with a dataframe assert np.array_equal(pyfunc_model.predict(pd.DataFrame(test_data)), test_data) # Can not call with a list with pytest.raises( MlflowException, match= "This model contains a tensor-based model signature with no input names", ): pyfunc_model.predict([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Can not call with a dict with pytest.raises( MlflowException, match= "This model contains a tensor-based model signature with no input names", ): pyfunc_model.predict({"blah": test_data}) # Can not call with a np.ndarray of a wrong shape with pytest.raises( MlflowException, match=re.escape( "Shape of input (2, 2) does not match expected shape (-1, 3)"), ): pyfunc_model.predict(np.array([[1.0, 2.0], [4.0, 5.0]])) # Can not call with a np.ndarray of a wrong type with pytest.raises( MlflowException, match="dtype of input uint32 does not match expected dtype float32" ): pyfunc_model.predict(test_data.astype(np.uint32)) # Can call with a np.ndarray with more elements along variable axis test_data2 = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], dtype=np.float32) assert np.array_equal(pyfunc_model.predict(test_data2), test_data2) # Can not call with an empty ndarray with pytest.raises( MlflowException, match=re.escape( "Shape of input () does not match expected shape (-1, 3)")): pyfunc_model.predict(np.ndarray([]))
def test_parse_tf_serving_single_array(): def assert_result(result, expected_result): assert (result == expected_result).all() # values for each column are properly converted to a tensor arr = [ [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[3, 2, 1], [6, 5, 4], [9, 8, 7]], ] tfserving_instances = {"instances": arr} tfserving_inputs = {"inputs": arr} # Without schema instance_result = parse_tf_serving_input(tfserving_instances) assert instance_result.shape == (2, 3, 3) assert_result(instance_result, np.array(arr, dtype="int64")) input_result = parse_tf_serving_input(tfserving_inputs) assert input_result.shape == (2, 3, 3) assert_result(input_result, np.array(arr, dtype="int64")) # Unnamed schema schema = Schema([TensorSpec(np.dtype("float32"), [-1])]) instance_result = parse_tf_serving_input(tfserving_instances, schema) assert_result(instance_result, np.array(arr, dtype="float32")) input_result = parse_tf_serving_input(tfserving_inputs, schema) assert_result(input_result, np.array(arr, dtype="float32")) # named schema schema = Schema([TensorSpec(np.dtype("float32"), [-1], "a")]) instance_result = parse_tf_serving_input(tfserving_instances, schema) assert isinstance(instance_result, dict) assert len(instance_result.keys()) == 1 and "a" in instance_result assert_result(instance_result["a"], np.array(arr, dtype="float32")) input_result = parse_tf_serving_input(tfserving_inputs, schema) assert isinstance(input_result, dict) assert len(input_result.keys()) == 1 and "a" in input_result assert_result(input_result["a"], np.array(arr, dtype="float32"))
def test_tensor_multi_named_schema_enforcement(): m = Model() input_schema = Schema([ TensorSpec(np.dtype(np.uint64), (-1, 5), "a"), TensorSpec(np.dtype(np.short), (-1, 2), "b"), TensorSpec(np.dtype(np.float32), (2, -1, 2), "c"), ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) inp = { "a": np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=np.uint64), "b": np.array([[0, 0], [1, 1], [2, 2]], dtype=np.short), "c": np.array([[[0, 0], [1, 1]], [[2, 2], [3, 3]]], dtype=np.float32), } # test that missing column raises inp1 = {k: v for k, v in inp.items()} with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp1.pop("b")) assert "Model is missing inputs" in str(ex) # test that extra column is ignored inp2 = {k: v for k, v in inp.items()} inp2["x"] = 1 # test that extra column is removed res = pyfunc_model.predict(inp2) assert res == {k: v for k, v in inp.items() if k in {"a", "b", "c"}} expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that variable axes are supported inp3 = { "a": np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], dtype=np.uint64), "b": np.array([[0, 0], [1, 1]], dtype=np.short), "c": np.array([[[0, 0]], [[2, 2]]], dtype=np.float32), } res = pyfunc_model.predict(inp3) assert _compare_exact_tensor_dict_input(res, inp3) expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that type casting is not supported inp4 = {k: v for k, v in inp.items()} inp4["a"] = inp4["a"].astype(np.int32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp4) assert "dtype of input int32 does not match expected dtype uint64" in str( ex) # test wrong shape inp5 = { "a": np.array([[0, 0, 0, 0]], dtype=np.uint), "b": np.array([[0, 0], [1, 1]], dtype=np.short), "c": np.array([[[0, 0]]], dtype=np.float32), } with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp5) assert "Shape of input (1, 4) does not match expected shape (-1, 5)" in str( ex) # test non-dictionary input inp6 = [ np.array([[0, 0, 0, 0, 0]], dtype=np.uint64), np.array([[0, 0], [1, 1]], dtype=np.short), np.array([[[0, 0]]], dtype=np.float32), ] with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp6) assert "Model is missing inputs ['a', 'b', 'c']." in str(ex) # test empty ndarray does not work inp7 = {k: v for k, v in inp.items()} inp7["a"] = np.array([]) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp7) assert "Shape of input (0,) does not match expected shape" in str(ex) # test dictionary of str -> list does not work inp8 = {k: list(v) for k, v in inp.items()} with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp8) assert "This model contains a tensor-based model signature with input names" in str( ex) assert ( "suggests a dictionary input mapping input name to a numpy array, but a dict" " with value type <class 'list'> was found") in str(ex) # test dataframe input fails at shape enforcement pdf = pd.DataFrame( data=[[1, 2, 3]], columns=["a", "b", "c"], ) pdf["a"] = pdf["a"].astype(np.uint64) pdf["b"] = pdf["b"].astype(np.short) pdf["c"] = pdf["c"].astype(np.float32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Shape of input (1,) does not match expected shape (-1, 5)" in str( ex)
def test_dataframe_from_json(): source = pd.DataFrame( { "boolean": [True, False, True], "string": ["a", "b", "c"], "float": np.array([1.2, 2.3, 3.4], dtype=np.float32), "double": np.array([1.2, 2.3, 3.4], dtype=np.float64), "integer": np.array([3, 4, 5], dtype=np.int32), "long": np.array([3, 4, 5], dtype=np.int64), "binary": [bytes([1, 2, 3]), bytes([4, 5]), bytes([6])], "date_string": ["2018-02-03", "1996-03-02", "2021-03-05"], }, columns=[ "boolean", "string", "float", "double", "integer", "long", "binary", "date_string", ], ) jsonable_df = pd.DataFrame(source, copy=True) jsonable_df["binary"] = jsonable_df["binary"].map(base64.b64encode) schema = Schema([ ColSpec("boolean", "boolean"), ColSpec("string", "string"), ColSpec("float", "float"), ColSpec("double", "double"), ColSpec("integer", "integer"), ColSpec("long", "long"), ColSpec("binary", "binary"), ColSpec("string", "date_string"), ]) parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"), pandas_orient="split", schema=schema) assert parsed.equals(source) parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"), pandas_orient="records", schema=schema) assert parsed.equals(source) # try parsing with tensor schema tensor_schema = Schema([ TensorSpec(np.dtype("bool"), [-1], "boolean"), TensorSpec(np.dtype("str"), [-1], "string"), TensorSpec(np.dtype("float32"), [-1], "float"), TensorSpec(np.dtype("float64"), [-1], "double"), TensorSpec(np.dtype("int32"), [-1], "integer"), TensorSpec(np.dtype("int64"), [-1], "long"), TensorSpec(np.dtype(bytes), [-1], "binary"), ]) parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"), pandas_orient="split", schema=tensor_schema) # NB: tensor schema does not automatically decode base64 encoded bytes. assert parsed.equals(jsonable_df) parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"), pandas_orient="records", schema=tensor_schema) # NB: tensor schema does not automatically decode base64 encoded bytes. assert parsed.equals(jsonable_df) # Test parse with TesnorSchema with a single tensor tensor_schema = Schema([TensorSpec(np.dtype("float32"), [-1, 3])]) source = pd.DataFrame( { "a": np.array([1, 2, 3], dtype=np.float32), "b": np.array([4.1, 5.2, 6.3], dtype=np.float32), "c": np.array([7, 8, 9], dtype=np.float32), }, columns=["a", "b", "c"], ) assert source.equals( _dataframe_from_json(source.to_json(orient="split"), pandas_orient="split", schema=tensor_schema)) assert source.equals( _dataframe_from_json(source.to_json(orient="records"), pandas_orient="records", schema=tensor_schema))